1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/i18n/rtl.h" 6 7 #include <algorithm> 8 9 #include "base/files/file_path.h" 10 #include "base/strings/string_util.h" 11 #include "base/strings/sys_string_conversions.h" 12 #include "base/strings/utf_string_conversions.h" 13 #include "testing/gtest/include/gtest/gtest.h" 14 #include "testing/platform_test.h" 15 #include "third_party/icu/source/i18n/unicode/usearch.h" 16 17 namespace base { 18 namespace i18n { 19 20 namespace { 21 22 // A test utility function to set the application default text direction. 23 void SetRTL(bool rtl) { 24 // Override the current locale/direction. 25 SetICUDefaultLocale(rtl ? "he" : "en"); 26 EXPECT_EQ(rtl, IsRTL()); 27 } 28 29 } // namespace 30 31 class RTLTest : public PlatformTest { 32 }; 33 34 TEST_F(RTLTest, GetFirstStrongCharacterDirection) { 35 struct { 36 const wchar_t* text; 37 TextDirection direction; 38 } cases[] = { 39 // Test pure LTR string. 40 { L"foo bar", LEFT_TO_RIGHT }, 41 // Test pure RTL string. 42 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT}, 43 // Test bidi string in which the first character with strong directionality 44 // is a character with type L. 45 { L"foo \x05d0 bar", LEFT_TO_RIGHT }, 46 // Test bidi string in which the first character with strong directionality 47 // is a character with type R. 48 { L"\x05d0 foo bar", RIGHT_TO_LEFT }, 49 // Test bidi string which starts with a character with weak directionality 50 // and in which the first character with strong directionality is a 51 // character with type L. 52 { L"!foo \x05d0 bar", LEFT_TO_RIGHT }, 53 // Test bidi string which starts with a character with weak directionality 54 // and in which the first character with strong directionality is a 55 // character with type R. 56 { L",\x05d0 foo bar", RIGHT_TO_LEFT }, 57 // Test bidi string in which the first character with strong directionality 58 // is a character with type LRE. 59 { L"\x202a \x05d0 foo bar", LEFT_TO_RIGHT }, 60 // Test bidi string in which the first character with strong directionality 61 // is a character with type LRO. 62 { L"\x202d \x05d0 foo bar", LEFT_TO_RIGHT }, 63 // Test bidi string in which the first character with strong directionality 64 // is a character with type RLE. 65 { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT }, 66 // Test bidi string in which the first character with strong directionality 67 // is a character with type RLO. 68 { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT }, 69 // Test bidi string in which the first character with strong directionality 70 // is a character with type AL. 71 { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT }, 72 // Test a string without strong directionality characters. 73 { L",!.{}", LEFT_TO_RIGHT }, 74 // Test empty string. 75 { L"", LEFT_TO_RIGHT }, 76 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to 77 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more 78 // information). 79 { 80 #if defined(WCHAR_T_IS_UTF32) 81 L" ! \x10910" L"abc 123", 82 #elif defined(WCHAR_T_IS_UTF16) 83 L" ! \xd802\xdd10" L"abc 123", 84 #else 85 #error wchar_t should be either UTF-16 or UTF-32 86 #endif 87 RIGHT_TO_LEFT }, 88 { 89 #if defined(WCHAR_T_IS_UTF32) 90 L" ! \x10401" L"abc 123", 91 #elif defined(WCHAR_T_IS_UTF16) 92 L" ! \xd801\xdc01" L"abc 123", 93 #else 94 #error wchar_t should be either UTF-16 or UTF-32 95 #endif 96 LEFT_TO_RIGHT }, 97 }; 98 99 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 100 EXPECT_EQ(cases[i].direction, 101 GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text))); 102 } 103 104 105 // Note that the cases with LRE, LRO, RLE and RLO are invalid for 106 // GetLastStrongCharacterDirection because they should be followed by PDF 107 // character. 108 TEST_F(RTLTest, GetLastStrongCharacterDirection) { 109 struct { 110 const wchar_t* text; 111 TextDirection direction; 112 } cases[] = { 113 // Test pure LTR string. 114 { L"foo bar", LEFT_TO_RIGHT }, 115 // Test pure RTL string. 116 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT}, 117 // Test bidi string in which the last character with strong directionality 118 // is a character with type L. 119 { L"foo \x05d0 bar", LEFT_TO_RIGHT }, 120 // Test bidi string in which the last character with strong directionality 121 // is a character with type R. 122 { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT }, 123 // Test bidi string which ends with a character with weak directionality 124 // and in which the last character with strong directionality is a 125 // character with type L. 126 { L"!foo \x05d0 bar!", LEFT_TO_RIGHT }, 127 // Test bidi string which ends with a character with weak directionality 128 // and in which the last character with strong directionality is a 129 // character with type R. 130 { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT }, 131 // Test bidi string in which the last character with strong directionality 132 // is a character with type AL. 133 { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT }, 134 // Test a string without strong directionality characters. 135 { L",!.{}", LEFT_TO_RIGHT }, 136 // Test empty string. 137 { L"", LEFT_TO_RIGHT }, 138 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to 139 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more 140 // information). 141 { 142 #if defined(WCHAR_T_IS_UTF32) 143 L"abc 123" L" ! \x10910 !", 144 #elif defined(WCHAR_T_IS_UTF16) 145 L"abc 123" L" ! \xd802\xdd10 !", 146 #else 147 #error wchar_t should be either UTF-16 or UTF-32 148 #endif 149 RIGHT_TO_LEFT }, 150 { 151 #if defined(WCHAR_T_IS_UTF32) 152 L"abc 123" L" ! \x10401 !", 153 #elif defined(WCHAR_T_IS_UTF16) 154 L"abc 123" L" ! \xd801\xdc01 !", 155 #else 156 #error wchar_t should be either UTF-16 or UTF-32 157 #endif 158 LEFT_TO_RIGHT }, 159 }; 160 161 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 162 EXPECT_EQ(cases[i].direction, 163 GetLastStrongCharacterDirection(WideToUTF16(cases[i].text))); 164 } 165 166 TEST_F(RTLTest, GetStringDirection) { 167 struct { 168 const wchar_t* text; 169 TextDirection direction; 170 } cases[] = { 171 // Test pure LTR string. 172 { L"foobar", LEFT_TO_RIGHT }, 173 { L".foobar", LEFT_TO_RIGHT }, 174 { L"foo, bar", LEFT_TO_RIGHT }, 175 // Test pure LTR with strong directionality characters of type LRE. 176 { L"\x202a\x202a", LEFT_TO_RIGHT }, 177 { L".\x202a\x202a", LEFT_TO_RIGHT }, 178 { L"\x202a, \x202a", LEFT_TO_RIGHT }, 179 // Test pure LTR with strong directionality characters of type LRO. 180 { L"\x202d\x202d", LEFT_TO_RIGHT }, 181 { L".\x202d\x202d", LEFT_TO_RIGHT }, 182 { L"\x202d, \x202d", LEFT_TO_RIGHT }, 183 // Test pure LTR with various types of strong directionality characters. 184 { L"foo \x202a\x202d", LEFT_TO_RIGHT }, 185 { L".\x202d foo \x202a", LEFT_TO_RIGHT }, 186 { L"\x202a, \x202d foo", LEFT_TO_RIGHT }, 187 // Test pure RTL with strong directionality characters of type R. 188 { L"\x05d0\x05d0", RIGHT_TO_LEFT }, 189 { L".\x05d0\x05d0", RIGHT_TO_LEFT }, 190 { L"\x05d0, \x05d0", RIGHT_TO_LEFT }, 191 // Test pure RTL with strong directionality characters of type RLE. 192 { L"\x202b\x202b", RIGHT_TO_LEFT }, 193 { L".\x202b\x202b", RIGHT_TO_LEFT }, 194 { L"\x202b, \x202b", RIGHT_TO_LEFT }, 195 // Test pure RTL with strong directionality characters of type RLO. 196 { L"\x202e\x202e", RIGHT_TO_LEFT }, 197 { L".\x202e\x202e", RIGHT_TO_LEFT }, 198 { L"\x202e, \x202e", RIGHT_TO_LEFT }, 199 // Test pure RTL with strong directionality characters of type AL. 200 { L"\x0622\x0622", RIGHT_TO_LEFT }, 201 { L".\x0622\x0622", RIGHT_TO_LEFT }, 202 { L"\x0622, \x0622", RIGHT_TO_LEFT }, 203 // Test pure RTL with various types of strong directionality characters. 204 { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT }, 205 { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT }, 206 { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT }, 207 // Test bidi strings. 208 { L"foo \x05d0 bar", UNKNOWN_DIRECTION }, 209 { L"\x202b foo bar", UNKNOWN_DIRECTION }, 210 { L"!foo \x0622 bar", UNKNOWN_DIRECTION }, 211 { L"\x202a\x202b", UNKNOWN_DIRECTION }, 212 { L"\x202e\x202d", UNKNOWN_DIRECTION }, 213 { L"\x0622\x202a", UNKNOWN_DIRECTION }, 214 { L"\x202d\x05d0", UNKNOWN_DIRECTION }, 215 // Test a string without strong directionality characters. 216 { L",!.{}", LEFT_TO_RIGHT }, 217 // Test empty string. 218 { L"", LEFT_TO_RIGHT }, 219 { 220 #if defined(WCHAR_T_IS_UTF32) 221 L" ! \x10910" L"abc 123", 222 #elif defined(WCHAR_T_IS_UTF16) 223 L" ! \xd802\xdd10" L"abc 123", 224 #else 225 #error wchar_t should be either UTF-16 or UTF-32 226 #endif 227 UNKNOWN_DIRECTION }, 228 { 229 #if defined(WCHAR_T_IS_UTF32) 230 L" ! \x10401" L"abc 123", 231 #elif defined(WCHAR_T_IS_UTF16) 232 L" ! \xd801\xdc01" L"abc 123", 233 #else 234 #error wchar_t should be either UTF-16 or UTF-32 235 #endif 236 LEFT_TO_RIGHT }, 237 }; 238 239 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 240 EXPECT_EQ(cases[i].direction, 241 GetStringDirection(WideToUTF16(cases[i].text))); 242 } 243 244 TEST_F(RTLTest, WrapPathWithLTRFormatting) { 245 const wchar_t* cases[] = { 246 // Test common path, such as "c:\foo\bar". 247 L"c:/foo/bar", 248 // Test path with file name, such as "c:\foo\bar\test.jpg". 249 L"c:/foo/bar/test.jpg", 250 // Test path ending with punctuation, such as "c:\(foo)\bar.". 251 L"c:/(foo)/bar.", 252 // Test path ending with separator, such as "c:\foo\bar\". 253 L"c:/foo/bar/", 254 // Test path with RTL character. 255 L"c:/\x05d0", 256 // Test path with 2 level RTL directory names. 257 L"c:/\x05d0/\x0622", 258 // Test path with mixed RTL/LTR directory names and ending with punctuation. 259 L"c:/\x05d0/\x0622/(foo)/b.a.r.", 260 // Test path without driver name, such as "/foo/bar/test/jpg". 261 L"/foo/bar/test.jpg", 262 // Test path start with current directory, such as "./foo". 263 L"./foo", 264 // Test path start with parent directory, such as "../foo/bar.jpg". 265 L"../foo/bar.jpg", 266 // Test absolute path, such as "//foo/bar.jpg". 267 L"//foo/bar.jpg", 268 // Test path with mixed RTL/LTR directory names. 269 L"c:/foo/\x05d0/\x0622/\x05d1.jpg", 270 // Test empty path. 271 L"" 272 }; 273 274 for (size_t i = 0; i < arraysize(cases); ++i) { 275 FilePath path; 276 #if defined(OS_WIN) 277 std::wstring win_path(cases[i]); 278 std::replace(win_path.begin(), win_path.end(), '/', '\\'); 279 path = FilePath(win_path); 280 std::wstring wrapped_expected = 281 std::wstring(L"\x202a") + win_path + L"\x202c"; 282 #else 283 path = FilePath(base::SysWideToNativeMB(cases[i])); 284 std::wstring wrapped_expected = 285 std::wstring(L"\x202a") + cases[i] + L"\x202c"; 286 #endif 287 string16 localized_file_path_string; 288 WrapPathWithLTRFormatting(path, &localized_file_path_string); 289 290 std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string); 291 EXPECT_EQ(wrapped_expected, wrapped_actual); 292 } 293 } 294 295 TEST_F(RTLTest, WrapString) { 296 const wchar_t* cases[] = { 297 L" . ", 298 L"abc", 299 L"a" L"\x5d0\x5d1", 300 L"a" L"\x5d1" L"b", 301 L"\x5d0\x5d1\x5d2", 302 L"\x5d0\x5d1" L"a", 303 L"\x5d0" L"a" L"\x5d1", 304 }; 305 306 const bool was_rtl = IsRTL(); 307 308 for (size_t i = 0; i < 2; ++i) { 309 // Toggle the application default text direction (to try each direction). 310 SetRTL(!IsRTL()); 311 312 string16 empty; 313 WrapStringWithLTRFormatting(&empty); 314 EXPECT_TRUE(empty.empty()); 315 WrapStringWithRTLFormatting(&empty); 316 EXPECT_TRUE(empty.empty()); 317 318 for (size_t i = 0; i < arraysize(cases); ++i) { 319 string16 input = WideToUTF16(cases[i]); 320 string16 ltr_wrap = input; 321 WrapStringWithLTRFormatting(<r_wrap); 322 EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark); 323 EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input); 324 EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting); 325 326 string16 rtl_wrap = input; 327 WrapStringWithRTLFormatting(&rtl_wrap); 328 EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark); 329 EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input); 330 EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting); 331 } 332 } 333 334 EXPECT_EQ(was_rtl, IsRTL()); 335 } 336 337 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) { 338 struct { 339 const wchar_t* path; 340 bool wrap_ltr; 341 bool wrap_rtl; 342 } cases[] = { 343 { L"test", false, true }, 344 { L"test.html", false, true }, 345 { L"\x05d0\x05d1\x05d2", true, true }, 346 { L"\x05d0\x05d1\x05d2.txt", true, true }, 347 { L"\x05d0" L"abc", true, true }, 348 { L"\x05d0" L"abc.txt", true, true }, 349 { L"abc\x05d0\x05d1", false, true }, 350 { L"abc\x05d0\x05d1.jpg", false, true }, 351 }; 352 353 const bool was_rtl = IsRTL(); 354 355 for (size_t i = 0; i < 2; ++i) { 356 // Toggle the application default text direction (to try each direction). 357 SetRTL(!IsRTL()); 358 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 359 string16 input = WideToUTF16(cases[i].path); 360 string16 output = GetDisplayStringInLTRDirectionality(input); 361 // Test the expected wrapping behavior for the current UI directionality. 362 if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr) 363 EXPECT_NE(output, input); 364 else 365 EXPECT_EQ(output, input); 366 } 367 } 368 369 EXPECT_EQ(was_rtl, IsRTL()); 370 } 371 372 TEST_F(RTLTest, GetTextDirection) { 373 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar")); 374 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG")); 375 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he")); 376 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL")); 377 // iw is an obsolete code for Hebrew. 378 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw")); 379 // Although we're not yet localized to Farsi and Urdu, we 380 // do have the text layout direction information for them. 381 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa")); 382 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur")); 383 #if 0 384 // Enable these when we include the minimal locale data for Azerbaijani 385 // written in Arabic and Dhivehi. At the moment, our copy of 386 // ICU data does not have entries for them. 387 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab")); 388 // Dhivehi that uses Thaana script. 389 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv")); 390 #endif 391 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en")); 392 // Chinese in China with '-'. 393 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN")); 394 // Filipino : 3-letter code 395 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil")); 396 // Russian 397 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru")); 398 // Japanese that uses multiple scripts 399 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja")); 400 } 401 402 TEST_F(RTLTest, UnadjustStringForLocaleDirection) { 403 // These test strings are borrowed from WrapPathWithLTRFormatting 404 const wchar_t* cases[] = { 405 L"foo bar", 406 L"foo \x05d0 bar", 407 L"\x05d0 foo bar", 408 L"!foo \x05d0 bar", 409 L",\x05d0 foo bar", 410 L"\x202a \x05d0 foo bar", 411 L"\x202d \x05d0 foo bar", 412 L"\x202b foo \x05d0 bar", 413 L"\x202e foo \x05d0 bar", 414 L"\x0622 foo \x05d0 bar", 415 }; 416 417 const bool was_rtl = IsRTL(); 418 419 for (size_t i = 0; i < 2; ++i) { 420 // Toggle the application default text direction (to try each direction). 421 SetRTL(!IsRTL()); 422 423 for (size_t i = 0; i < arraysize(cases); ++i) { 424 string16 test_case = WideToUTF16(cases[i]); 425 string16 adjusted_string = test_case; 426 427 if (!AdjustStringForLocaleDirection(&adjusted_string)) 428 continue; 429 430 EXPECT_NE(test_case, adjusted_string); 431 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string)); 432 EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case 433 << "] with IsRTL() == " << IsRTL(); 434 } 435 } 436 437 EXPECT_EQ(was_rtl, IsRTL()); 438 } 439 440 } // namespace i18n 441 } // namespace base 442