1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <string> 6 7 #include "base/i18n/rtl.h" 8 #include "base/i18n/string_search.h" 9 #include "base/strings/string16.h" 10 #include "base/strings/utf_string_conversions.h" 11 #include "testing/gtest/include/gtest/gtest.h" 12 #include "third_party/icu/source/i18n/unicode/usearch.h" 13 14 namespace base { 15 namespace i18n { 16 17 // Note on setting default locale for testing: The current default locale on 18 // the Mac trybot is en_US_POSIX, with which primary-level collation strength 19 // string search is case-sensitive, when normally it should be 20 // case-insensitive. In other locales (including en_US which English speakers 21 // in the U.S. use), this search would be case-insensitive as expected. 22 23 TEST(StringSearchTest, ASCII) { 24 std::string default_locale(uloc_getDefault()); 25 bool locale_is_posix = (default_locale == "en_US_POSIX"); 26 if (locale_is_posix) 27 SetICUDefaultLocale("en_US"); 28 29 size_t index = 0; 30 size_t length = 0; 31 32 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 33 ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length)); 34 EXPECT_EQ(0U, index); 35 EXPECT_EQ(5U, length); 36 37 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( 38 ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"), 39 &index, &length)); 40 41 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 42 ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length)); 43 EXPECT_EQ(4U, index); 44 EXPECT_EQ(6U, length); 45 46 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( 47 ASCIIToUTF16("searching within empty string"), string16(), 48 &index, &length)); 49 50 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 51 string16(), ASCIIToUTF16("searching for empty string"), &index, &length)); 52 EXPECT_EQ(0U, index); 53 EXPECT_EQ(0U, length); 54 55 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 56 ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"), 57 &index, &length)); 58 EXPECT_EQ(0U, index); 59 EXPECT_EQ(18U, length); 60 61 if (locale_is_posix) 62 SetICUDefaultLocale(default_locale.data()); 63 } 64 65 TEST(StringSearchTest, UnicodeLocaleIndependent) { 66 // Base characters 67 const string16 e_base = WideToUTF16(L"e"); 68 const string16 E_base = WideToUTF16(L"E"); 69 const string16 a_base = WideToUTF16(L"a"); 70 71 // Composed characters 72 const string16 e_with_acute_accent = WideToUTF16(L"\u00e9"); 73 const string16 E_with_acute_accent = WideToUTF16(L"\u00c9"); 74 const string16 e_with_grave_accent = WideToUTF16(L"\u00e8"); 75 const string16 E_with_grave_accent = WideToUTF16(L"\u00c8"); 76 const string16 a_with_acute_accent = WideToUTF16(L"\u00e1"); 77 78 // Decomposed characters 79 const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301"); 80 const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301"); 81 const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300"); 82 const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300"); 83 const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301"); 84 85 std::string default_locale(uloc_getDefault()); 86 bool locale_is_posix = (default_locale == "en_US_POSIX"); 87 if (locale_is_posix) 88 SetICUDefaultLocale("en_US"); 89 90 size_t index = 0; 91 size_t length = 0; 92 93 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 94 e_base, e_with_acute_accent, &index, &length)); 95 EXPECT_EQ(0U, index); 96 EXPECT_EQ(e_with_acute_accent.size(), length); 97 98 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 99 e_with_acute_accent, e_base, &index, &length)); 100 EXPECT_EQ(0U, index); 101 EXPECT_EQ(e_base.size(), length); 102 103 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 104 e_base, e_with_acute_combining_mark, &index, &length)); 105 EXPECT_EQ(0U, index); 106 EXPECT_EQ(e_with_acute_combining_mark.size(), length); 107 108 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 109 e_with_acute_combining_mark, e_base, &index, &length)); 110 EXPECT_EQ(0U, index); 111 EXPECT_EQ(e_base.size(), length); 112 113 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 114 e_with_acute_combining_mark, e_with_acute_accent, 115 &index, &length)); 116 EXPECT_EQ(0U, index); 117 EXPECT_EQ(e_with_acute_accent.size(), length); 118 119 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 120 e_with_acute_accent, e_with_acute_combining_mark, 121 &index, &length)); 122 EXPECT_EQ(0U, index); 123 EXPECT_EQ(e_with_acute_combining_mark.size(), length); 124 125 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 126 e_with_acute_combining_mark, e_with_grave_combining_mark, 127 &index, &length)); 128 EXPECT_EQ(0U, index); 129 EXPECT_EQ(e_with_grave_combining_mark.size(), length); 130 131 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 132 e_with_grave_combining_mark, e_with_acute_combining_mark, 133 &index, &length)); 134 EXPECT_EQ(0U, index); 135 EXPECT_EQ(e_with_acute_combining_mark.size(), length); 136 137 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 138 e_with_acute_combining_mark, e_with_grave_accent, &index, &length)); 139 EXPECT_EQ(0U, index); 140 EXPECT_EQ(e_with_grave_accent.size(), length); 141 142 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 143 e_with_grave_accent, e_with_acute_combining_mark, &index, &length)); 144 EXPECT_EQ(0U, index); 145 EXPECT_EQ(e_with_acute_combining_mark.size(), length); 146 147 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 148 E_with_acute_accent, e_with_acute_accent, &index, &length)); 149 EXPECT_EQ(0U, index); 150 EXPECT_EQ(e_with_acute_accent.size(), length); 151 152 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 153 E_with_grave_accent, e_with_acute_accent, &index, &length)); 154 EXPECT_EQ(0U, index); 155 EXPECT_EQ(e_with_acute_accent.size(), length); 156 157 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 158 E_with_acute_combining_mark, e_with_grave_accent, &index, &length)); 159 EXPECT_EQ(0U, index); 160 EXPECT_EQ(e_with_grave_accent.size(), length); 161 162 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 163 E_with_grave_combining_mark, e_with_acute_accent, &index, &length)); 164 EXPECT_EQ(0U, index); 165 EXPECT_EQ(e_with_acute_accent.size(), length); 166 167 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 168 E_base, e_with_grave_accent, &index, &length)); 169 EXPECT_EQ(0U, index); 170 EXPECT_EQ(e_with_grave_accent.size(), length); 171 172 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( 173 a_with_acute_accent, e_with_acute_accent, &index, &length)); 174 175 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( 176 a_with_acute_combining_mark, e_with_acute_combining_mark, 177 &index, &length)); 178 179 if (locale_is_posix) 180 SetICUDefaultLocale(default_locale.data()); 181 } 182 183 TEST(StringSearchTest, UnicodeLocaleDependent) { 184 // Base characters 185 const string16 a_base = WideToUTF16(L"a"); 186 187 // Composed characters 188 const string16 a_with_ring = WideToUTF16(L"\u00e5"); 189 190 EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( 191 a_base, a_with_ring, NULL, NULL)); 192 193 const char* default_locale = uloc_getDefault(); 194 SetICUDefaultLocale("da"); 195 196 EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( 197 a_base, a_with_ring, NULL, NULL)); 198 199 SetICUDefaultLocale(default_locale); 200 } 201 202 TEST(StringSearchTest, FixedPatternMultipleSearch) { 203 std::string default_locale(uloc_getDefault()); 204 bool locale_is_posix = (default_locale == "en_US_POSIX"); 205 if (locale_is_posix) 206 SetICUDefaultLocale("en_US"); 207 208 size_t index = 0; 209 size_t length = 0; 210 211 // Search "hello" over multiple texts. 212 FixedPatternStringSearchIgnoringCaseAndAccents query(ASCIIToUTF16("hello")); 213 EXPECT_TRUE(query.Search(ASCIIToUTF16("12hello34"), &index, &length)); 214 EXPECT_EQ(2U, index); 215 EXPECT_EQ(5U, length); 216 EXPECT_FALSE(query.Search(ASCIIToUTF16("bye"), &index, &length)); 217 EXPECT_TRUE(query.Search(ASCIIToUTF16("hELLo"), &index, &length)); 218 EXPECT_EQ(0U, index); 219 EXPECT_EQ(5U, length); 220 221 if (locale_is_posix) 222 SetICUDefaultLocale(default_locale.data()); 223 } 224 225 } // namespace i18n 226 } // namespace base 227