Home | History | Annotate | Download | only in i18n
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/i18n/rtl.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/files/file_path.h"
     10 #include "base/strings/string_util.h"
     11 #include "base/strings/sys_string_conversions.h"
     12 #include "base/strings/utf_string_conversions.h"
     13 #include "testing/gtest/include/gtest/gtest.h"
     14 #include "testing/platform_test.h"
     15 #include "third_party/icu/source/i18n/unicode/usearch.h"
     16 
     17 namespace base {
     18 namespace i18n {
     19 
     20 namespace {
     21 
     22 // A test utility function to set the application default text direction.
     23 void SetRTL(bool rtl) {
     24   // Override the current locale/direction.
     25   SetICUDefaultLocale(rtl ? "he" : "en");
     26   EXPECT_EQ(rtl, IsRTL());
     27 }
     28 
     29 }  // namespace
     30 
     31 class RTLTest : public PlatformTest {
     32 };
     33 
     34 TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
     35   struct {
     36     const wchar_t* text;
     37     TextDirection direction;
     38   } cases[] = {
     39     // Test pure LTR string.
     40     { L"foo bar", LEFT_TO_RIGHT },
     41     // Test pure RTL string.
     42     { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
     43     // Test bidi string in which the first character with strong directionality
     44     // is a character with type L.
     45     { L"foo \x05d0 bar", LEFT_TO_RIGHT },
     46     // Test bidi string in which the first character with strong directionality
     47     // is a character with type R.
     48     { L"\x05d0 foo bar", RIGHT_TO_LEFT },
     49     // Test bidi string which starts with a character with weak directionality
     50     // and in which the first character with strong directionality is a
     51     // character with type L.
     52     { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
     53     // Test bidi string which starts with a character with weak directionality
     54     // and in which the first character with strong directionality is a
     55     // character with type R.
     56     { L",\x05d0 foo bar", RIGHT_TO_LEFT },
     57     // Test bidi string in which the first character with strong directionality
     58     // is a character with type LRE.
     59     { L"\x202a \x05d0 foo  bar", LEFT_TO_RIGHT },
     60     // Test bidi string in which the first character with strong directionality
     61     // is a character with type LRO.
     62     { L"\x202d \x05d0 foo  bar", LEFT_TO_RIGHT },
     63     // Test bidi string in which the first character with strong directionality
     64     // is a character with type RLE.
     65     { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
     66     // Test bidi string in which the first character with strong directionality
     67     // is a character with type RLO.
     68     { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
     69     // Test bidi string in which the first character with strong directionality
     70     // is a character with type AL.
     71     { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
     72     // Test a string without strong directionality characters.
     73     { L",!.{}", LEFT_TO_RIGHT },
     74     // Test empty string.
     75     { L"", LEFT_TO_RIGHT },
     76     // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
     77     // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
     78     // information).
     79     {
     80 #if defined(WCHAR_T_IS_UTF32)
     81       L" ! \x10910" L"abc 123",
     82 #elif defined(WCHAR_T_IS_UTF16)
     83       L" ! \xd802\xdd10" L"abc 123",
     84 #else
     85 #error wchar_t should be either UTF-16 or UTF-32
     86 #endif
     87       RIGHT_TO_LEFT },
     88     {
     89 #if defined(WCHAR_T_IS_UTF32)
     90       L" ! \x10401" L"abc 123",
     91 #elif defined(WCHAR_T_IS_UTF16)
     92       L" ! \xd801\xdc01" L"abc 123",
     93 #else
     94 #error wchar_t should be either UTF-16 or UTF-32
     95 #endif
     96       LEFT_TO_RIGHT },
     97    };
     98 
     99   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
    100     EXPECT_EQ(cases[i].direction,
    101               GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
    102 }
    103 
    104 
    105 // Note that the cases with LRE, LRO, RLE and RLO are invalid for
    106 // GetLastStrongCharacterDirection because they should be followed by PDF
    107 // character.
    108 TEST_F(RTLTest, GetLastStrongCharacterDirection) {
    109   struct {
    110     const wchar_t* text;
    111     TextDirection direction;
    112   } cases[] = {
    113     // Test pure LTR string.
    114     { L"foo bar", LEFT_TO_RIGHT },
    115     // Test pure RTL string.
    116     { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
    117     // Test bidi string in which the last character with strong directionality
    118     // is a character with type L.
    119     { L"foo \x05d0 bar", LEFT_TO_RIGHT },
    120     // Test bidi string in which the last character with strong directionality
    121     // is a character with type R.
    122     { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT },
    123     // Test bidi string which ends with a character with weak directionality
    124     // and in which the last character with strong directionality is a
    125     // character with type L.
    126     { L"!foo \x05d0 bar!", LEFT_TO_RIGHT },
    127     // Test bidi string which ends with a character with weak directionality
    128     // and in which the last character with strong directionality is a
    129     // character with type R.
    130     { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT },
    131     // Test bidi string in which the last character with strong directionality
    132     // is a character with type AL.
    133     { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT },
    134     // Test a string without strong directionality characters.
    135     { L",!.{}", LEFT_TO_RIGHT },
    136     // Test empty string.
    137     { L"", LEFT_TO_RIGHT },
    138     // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
    139     // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
    140     // information).
    141     {
    142 #if defined(WCHAR_T_IS_UTF32)
    143        L"abc 123" L" ! \x10910 !",
    144 #elif defined(WCHAR_T_IS_UTF16)
    145        L"abc 123" L" ! \xd802\xdd10 !",
    146 #else
    147 #error wchar_t should be either UTF-16 or UTF-32
    148 #endif
    149       RIGHT_TO_LEFT },
    150     {
    151 #if defined(WCHAR_T_IS_UTF32)
    152        L"abc 123" L" ! \x10401 !",
    153 #elif defined(WCHAR_T_IS_UTF16)
    154        L"abc 123" L" ! \xd801\xdc01 !",
    155 #else
    156 #error wchar_t should be either UTF-16 or UTF-32
    157 #endif
    158       LEFT_TO_RIGHT },
    159    };
    160 
    161   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
    162     EXPECT_EQ(cases[i].direction,
    163               GetLastStrongCharacterDirection(WideToUTF16(cases[i].text)));
    164 }
    165 
    166 TEST_F(RTLTest, GetStringDirection) {
    167   struct {
    168     const wchar_t* text;
    169     TextDirection direction;
    170   } cases[] = {
    171     // Test pure LTR string.
    172     { L"foobar", LEFT_TO_RIGHT },
    173     { L".foobar", LEFT_TO_RIGHT },
    174     { L"foo, bar", LEFT_TO_RIGHT },
    175     // Test pure LTR with strong directionality characters of type LRE.
    176     { L"\x202a\x202a", LEFT_TO_RIGHT },
    177     { L".\x202a\x202a", LEFT_TO_RIGHT },
    178     { L"\x202a, \x202a", LEFT_TO_RIGHT },
    179     // Test pure LTR with strong directionality characters of type LRO.
    180     { L"\x202d\x202d", LEFT_TO_RIGHT },
    181     { L".\x202d\x202d", LEFT_TO_RIGHT },
    182     { L"\x202d, \x202d", LEFT_TO_RIGHT },
    183     // Test pure LTR with various types of strong directionality characters.
    184     { L"foo \x202a\x202d", LEFT_TO_RIGHT },
    185     { L".\x202d foo \x202a", LEFT_TO_RIGHT },
    186     { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
    187     // Test pure RTL with strong directionality characters of type R.
    188     { L"\x05d0\x05d0", RIGHT_TO_LEFT },
    189     { L".\x05d0\x05d0", RIGHT_TO_LEFT },
    190     { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
    191     // Test pure RTL with strong directionality characters of type RLE.
    192     { L"\x202b\x202b", RIGHT_TO_LEFT },
    193     { L".\x202b\x202b", RIGHT_TO_LEFT },
    194     { L"\x202b, \x202b", RIGHT_TO_LEFT },
    195     // Test pure RTL with strong directionality characters of type RLO.
    196     { L"\x202e\x202e", RIGHT_TO_LEFT },
    197     { L".\x202e\x202e", RIGHT_TO_LEFT },
    198     { L"\x202e, \x202e", RIGHT_TO_LEFT },
    199     // Test pure RTL with strong directionality characters of type AL.
    200     { L"\x0622\x0622", RIGHT_TO_LEFT },
    201     { L".\x0622\x0622", RIGHT_TO_LEFT },
    202     { L"\x0622, \x0622", RIGHT_TO_LEFT },
    203     // Test pure RTL with various types of strong directionality characters.
    204     { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
    205     { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
    206     { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
    207     // Test bidi strings.
    208     { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
    209     { L"\x202b foo bar", UNKNOWN_DIRECTION },
    210     { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
    211     { L"\x202a\x202b", UNKNOWN_DIRECTION },
    212     { L"\x202e\x202d", UNKNOWN_DIRECTION },
    213     { L"\x0622\x202a", UNKNOWN_DIRECTION },
    214     { L"\x202d\x05d0", UNKNOWN_DIRECTION },
    215     // Test a string without strong directionality characters.
    216     { L",!.{}", LEFT_TO_RIGHT },
    217     // Test empty string.
    218     { L"", LEFT_TO_RIGHT },
    219     {
    220 #if defined(WCHAR_T_IS_UTF32)
    221       L" ! \x10910" L"abc 123",
    222 #elif defined(WCHAR_T_IS_UTF16)
    223       L" ! \xd802\xdd10" L"abc 123",
    224 #else
    225 #error wchar_t should be either UTF-16 or UTF-32
    226 #endif
    227       UNKNOWN_DIRECTION },
    228     {
    229 #if defined(WCHAR_T_IS_UTF32)
    230       L" ! \x10401" L"abc 123",
    231 #elif defined(WCHAR_T_IS_UTF16)
    232       L" ! \xd801\xdc01" L"abc 123",
    233 #else
    234 #error wchar_t should be either UTF-16 or UTF-32
    235 #endif
    236       LEFT_TO_RIGHT },
    237    };
    238 
    239   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
    240     EXPECT_EQ(cases[i].direction,
    241               GetStringDirection(WideToUTF16(cases[i].text)));
    242 }
    243 
    244 TEST_F(RTLTest, WrapPathWithLTRFormatting) {
    245   const wchar_t* cases[] = {
    246     // Test common path, such as "c:\foo\bar".
    247     L"c:/foo/bar",
    248     // Test path with file name, such as "c:\foo\bar\test.jpg".
    249     L"c:/foo/bar/test.jpg",
    250     // Test path ending with punctuation, such as "c:\(foo)\bar.".
    251     L"c:/(foo)/bar.",
    252     // Test path ending with separator, such as "c:\foo\bar\".
    253     L"c:/foo/bar/",
    254     // Test path with RTL character.
    255     L"c:/\x05d0",
    256     // Test path with 2 level RTL directory names.
    257     L"c:/\x05d0/\x0622",
    258     // Test path with mixed RTL/LTR directory names and ending with punctuation.
    259     L"c:/\x05d0/\x0622/(foo)/b.a.r.",
    260     // Test path without driver name, such as "/foo/bar/test/jpg".
    261     L"/foo/bar/test.jpg",
    262     // Test path start with current directory, such as "./foo".
    263     L"./foo",
    264     // Test path start with parent directory, such as "../foo/bar.jpg".
    265     L"../foo/bar.jpg",
    266     // Test absolute path, such as "//foo/bar.jpg".
    267     L"//foo/bar.jpg",
    268     // Test path with mixed RTL/LTR directory names.
    269     L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
    270     // Test empty path.
    271     L""
    272   };
    273 
    274   for (size_t i = 0; i < arraysize(cases); ++i) {
    275     FilePath path;
    276 #if defined(OS_WIN)
    277     std::wstring win_path(cases[i]);
    278     std::replace(win_path.begin(), win_path.end(), '/', '\\');
    279     path = FilePath(win_path);
    280     std::wstring wrapped_expected =
    281         std::wstring(L"\x202a") + win_path + L"\x202c";
    282 #else
    283     path = FilePath(base::SysWideToNativeMB(cases[i]));
    284     std::wstring wrapped_expected =
    285         std::wstring(L"\x202a") + cases[i] + L"\x202c";
    286 #endif
    287     string16 localized_file_path_string;
    288     WrapPathWithLTRFormatting(path, &localized_file_path_string);
    289 
    290     std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
    291     EXPECT_EQ(wrapped_expected, wrapped_actual);
    292   }
    293 }
    294 
    295 TEST_F(RTLTest, WrapString) {
    296   const wchar_t* cases[] = {
    297     L" . ",
    298     L"abc",
    299     L"a" L"\x5d0\x5d1",
    300     L"a" L"\x5d1" L"b",
    301     L"\x5d0\x5d1\x5d2",
    302     L"\x5d0\x5d1" L"a",
    303     L"\x5d0" L"a" L"\x5d1",
    304   };
    305 
    306   const bool was_rtl = IsRTL();
    307 
    308   for (size_t i = 0; i < 2; ++i) {
    309     // Toggle the application default text direction (to try each direction).
    310     SetRTL(!IsRTL());
    311 
    312     string16 empty;
    313     WrapStringWithLTRFormatting(&empty);
    314     EXPECT_TRUE(empty.empty());
    315     WrapStringWithRTLFormatting(&empty);
    316     EXPECT_TRUE(empty.empty());
    317 
    318     for (size_t i = 0; i < arraysize(cases); ++i) {
    319       string16 input = WideToUTF16(cases[i]);
    320       string16 ltr_wrap = input;
    321       WrapStringWithLTRFormatting(&ltr_wrap);
    322       EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
    323       EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
    324       EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
    325 
    326       string16 rtl_wrap = input;
    327       WrapStringWithRTLFormatting(&rtl_wrap);
    328       EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
    329       EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
    330       EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
    331     }
    332   }
    333 
    334   EXPECT_EQ(was_rtl, IsRTL());
    335 }
    336 
    337 TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
    338   struct {
    339     const wchar_t* path;
    340     bool wrap_ltr;
    341     bool wrap_rtl;
    342   } cases[] = {
    343     { L"test",                   false, true },
    344     { L"test.html",              false, true },
    345     { L"\x05d0\x05d1\x05d2",     true,  true },
    346     { L"\x05d0\x05d1\x05d2.txt", true,  true },
    347     { L"\x05d0" L"abc",          true,  true },
    348     { L"\x05d0" L"abc.txt",      true,  true },
    349     { L"abc\x05d0\x05d1",        false, true },
    350     { L"abc\x05d0\x05d1.jpg",    false, true },
    351   };
    352 
    353   const bool was_rtl = IsRTL();
    354 
    355   for (size_t i = 0; i < 2; ++i) {
    356     // Toggle the application default text direction (to try each direction).
    357     SetRTL(!IsRTL());
    358     for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    359       string16 input = WideToUTF16(cases[i].path);
    360       string16 output = GetDisplayStringInLTRDirectionality(input);
    361       // Test the expected wrapping behavior for the current UI directionality.
    362       if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
    363         EXPECT_NE(output, input);
    364       else
    365         EXPECT_EQ(output, input);
    366     }
    367   }
    368 
    369   EXPECT_EQ(was_rtl, IsRTL());
    370 }
    371 
    372 TEST_F(RTLTest, GetTextDirection) {
    373   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
    374   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
    375   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
    376   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
    377   // iw is an obsolete code for Hebrew.
    378   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
    379   // Although we're not yet localized to Farsi and Urdu, we
    380   // do have the text layout direction information for them.
    381   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
    382   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
    383 #if 0
    384   // Enable these when we include the minimal locale data for Azerbaijani
    385   // written in Arabic and Dhivehi. At the moment, our copy of
    386   // ICU data does not have entries for them.
    387   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
    388   // Dhivehi that uses Thaana script.
    389   EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
    390 #endif
    391   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
    392   // Chinese in China with '-'.
    393   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
    394   // Filipino : 3-letter code
    395   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
    396   // Russian
    397   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
    398   // Japanese that uses multiple scripts
    399   EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
    400 }
    401 
    402 TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
    403   // These test strings are borrowed from WrapPathWithLTRFormatting
    404   const wchar_t* cases[] = {
    405     L"foo bar",
    406     L"foo \x05d0 bar",
    407     L"\x05d0 foo bar",
    408     L"!foo \x05d0 bar",
    409     L",\x05d0 foo bar",
    410     L"\x202a \x05d0 foo  bar",
    411     L"\x202d \x05d0 foo  bar",
    412     L"\x202b foo \x05d0 bar",
    413     L"\x202e foo \x05d0 bar",
    414     L"\x0622 foo \x05d0 bar",
    415   };
    416 
    417   const bool was_rtl = IsRTL();
    418 
    419   for (size_t i = 0; i < 2; ++i) {
    420     // Toggle the application default text direction (to try each direction).
    421     SetRTL(!IsRTL());
    422 
    423     for (size_t i = 0; i < arraysize(cases); ++i) {
    424       string16 test_case = WideToUTF16(cases[i]);
    425       string16 adjusted_string = test_case;
    426 
    427       if (!AdjustStringForLocaleDirection(&adjusted_string))
    428         continue;
    429 
    430       EXPECT_NE(test_case, adjusted_string);
    431       EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
    432       EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
    433                                             << "] with IsRTL() == " << IsRTL();
    434     }
    435   }
    436 
    437   EXPECT_EQ(was_rtl, IsRTL());
    438 }
    439 
    440 }  // namespace i18n
    441 }  // namespace base
    442