Home | History | Annotate | Download | only in i18n
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/i18n/rtl.h"
      6 
      7 #include "base/file_path.h"
      8 #include "base/logging.h"
      9 #include "base/string_util.h"
     10 #include "base/utf_string_conversions.h"
     11 #include "base/sys_string_conversions.h"
     12 #include "unicode/coll.h"
     13 #include "unicode/locid.h"
     14 #include "unicode/uchar.h"
     15 #include "unicode/uscript.h"
     16 
     17 #if defined(TOOLKIT_USES_GTK)
     18 #include <gtk/gtk.h>
     19 #endif
     20 
     21 namespace {
     22 
     23 // Extract language and country, ignore keywords, concatenate using dash.
     24 std::string GetLocaleString(const icu::Locale& locale) {
     25   const char* language = locale.getLanguage();
     26   const char* country = locale.getCountry();
     27 
     28   std::string result =
     29       (language != NULL && *language != '\0') ? language : "und";
     30 
     31   if (country != NULL && *country != '\0') {
     32     result += '-';
     33     result += country;
     34   }
     35 
     36   return result;
     37 }
     38 
     39 }  // namespace
     40 
     41 namespace base {
     42 namespace i18n {
     43 
     44 // Represents the locale-specific ICU text direction.
     45 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
     46 
     47 // Convert the ICU default locale to a string.
     48 std::string GetConfiguredLocale() {
     49   return GetLocaleString(icu::Locale::getDefault());
     50 }
     51 
     52 // Convert the ICU canonicalized locale to a string.
     53 std::string GetCanonicalLocale(const char* locale) {
     54   return GetLocaleString(icu::Locale::createCanonical(locale));
     55 }
     56 
     57 // Convert Chrome locale name to ICU locale name
     58 std::string ICULocaleName(const std::string& locale_string) {
     59   // If not Spanish, just return it.
     60   if (locale_string.substr(0, 2) != "es")
     61     return locale_string;
     62   // Expand es to es-ES.
     63   if (LowerCaseEqualsASCII(locale_string, "es"))
     64     return "es-ES";
     65   // Map es-419 (Latin American Spanish) to es-FOO depending on the system
     66   // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
     67   // to es-MX (the most populous in Spanish-speaking Latin America).
     68   if (LowerCaseEqualsASCII(locale_string, "es-419")) {
     69     const icu::Locale& locale = icu::Locale::getDefault();
     70     std::string language = locale.getLanguage();
     71     const char* country = locale.getCountry();
     72     if (LowerCaseEqualsASCII(language, "es") &&
     73       !LowerCaseEqualsASCII(country, "es")) {
     74         language += '-';
     75         language += country;
     76         return language;
     77     }
     78     return "es-MX";
     79   }
     80   // Currently, Chrome has only "es" and "es-419", but later we may have
     81   // more specific "es-RR".
     82   return locale_string;
     83 }
     84 
     85 void SetICUDefaultLocale(const std::string& locale_string) {
     86   icu::Locale locale(ICULocaleName(locale_string).c_str());
     87   UErrorCode error_code = U_ZERO_ERROR;
     88   icu::Locale::setDefault(locale, error_code);
     89   // This return value is actually bogus because Locale object is
     90   // an ID and setDefault seems to always succeed (regardless of the
     91   // presence of actual locale data). However,
     92   // it does not hurt to have it as a sanity check.
     93   DCHECK(U_SUCCESS(error_code));
     94   g_icu_text_direction = UNKNOWN_DIRECTION;
     95 
     96   // If we use Views toolkit on top of GtkWidget, then we need to keep
     97   // GtkWidget's default text direction consistent with ICU's text direction.
     98   // Because in this case ICU's text direction will be used instead.
     99   // See IsRTL() function below.
    100 #if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK)
    101   gtk_widget_set_default_direction(
    102       ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
    103 #endif
    104 }
    105 
    106 bool IsRTL() {
    107 #if defined(TOOLKIT_GTK)
    108   GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
    109   return (gtk_dir == GTK_TEXT_DIR_RTL);
    110 #else
    111   return ICUIsRTL();
    112 #endif
    113 }
    114 
    115 bool ICUIsRTL() {
    116   if (g_icu_text_direction == UNKNOWN_DIRECTION) {
    117     const icu::Locale& locale = icu::Locale::getDefault();
    118     g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
    119   }
    120   return g_icu_text_direction == RIGHT_TO_LEFT;
    121 }
    122 
    123 TextDirection GetTextDirectionForLocale(const char* locale_name) {
    124   UErrorCode status = U_ZERO_ERROR;
    125   ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
    126   DCHECK(U_SUCCESS(status));
    127   // Treat anything other than RTL as LTR.
    128   return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
    129 }
    130 
    131 TextDirection GetFirstStrongCharacterDirection(const string16& text) {
    132   const UChar* string = text.c_str();
    133   size_t length = text.length();
    134   size_t position = 0;
    135   while (position < length) {
    136     UChar32 character;
    137     size_t next_position = position;
    138     U16_NEXT(string, next_position, length, character);
    139 
    140     // Now that we have the character, we use ICU in order to query for the
    141     // appropriate Unicode BiDi character type.
    142     int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
    143     if ((property == U_RIGHT_TO_LEFT) ||
    144         (property == U_RIGHT_TO_LEFT_ARABIC) ||
    145         (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
    146         (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
    147       return RIGHT_TO_LEFT;
    148     } else if ((property == U_LEFT_TO_RIGHT) ||
    149                (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
    150                (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
    151       return LEFT_TO_RIGHT;
    152     }
    153 
    154     position = next_position;
    155   }
    156 
    157   return LEFT_TO_RIGHT;
    158 }
    159 
    160 #if defined(WCHAR_T_IS_UTF32)
    161 TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
    162   return GetFirstStrongCharacterDirection(WideToUTF16(text));
    163 }
    164 #endif
    165 
    166 #if defined(OS_WIN)
    167 bool AdjustStringForLocaleDirection(string16* text) {
    168   if (!IsRTL() || text->empty())
    169     return false;
    170 
    171   // Marking the string as LTR if the locale is RTL and the string does not
    172   // contain strong RTL characters. Otherwise, mark the string as RTL.
    173   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
    174   if (!has_rtl_chars)
    175     WrapStringWithLTRFormatting(text);
    176   else
    177     WrapStringWithRTLFormatting(text);
    178 
    179   return true;
    180 }
    181 #else
    182 bool AdjustStringForLocaleDirection(string16* text) {
    183   // On OS X & GTK the directionality of a label is determined by the first
    184   // strongly directional character.
    185   // However, we want to make sure that in an LTR-language-UI all strings are
    186   // left aligned and vice versa.
    187   // A problem can arise if we display a string which starts with user input.
    188   // User input may be of the opposite directionality to the UI. So the whole
    189   // string will be displayed in the opposite directionality, e.g. if we want to
    190   // display in an LTR UI [such as US English]:
    191   //
    192   // EMAN_NOISNETXE is now installed.
    193   //
    194   // Since EXTENSION_NAME begins with a strong RTL char, the label's
    195   // directionality will be set to RTL and the string will be displayed visually
    196   // as:
    197   //
    198   // .is now installed EMAN_NOISNETXE
    199   //
    200   // In order to solve this issue, we prepend an LRM to the string. An LRM is a
    201   // strongly directional LTR char.
    202   // We also append an LRM at the end, which ensures that we're in an LTR
    203   // context.
    204 
    205   // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
    206   // box so there is no issue with displaying zero-width bidi control characters
    207   // on any system.  Thus no need for the !IsRTL() check here.
    208   if (text->empty())
    209     return false;
    210 
    211   bool ui_direction_is_rtl = IsRTL();
    212 
    213   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
    214   if (!ui_direction_is_rtl && has_rtl_chars) {
    215     WrapStringWithRTLFormatting(text);
    216     text->insert(0, 1, kLeftToRightMark);
    217     text->push_back(kLeftToRightMark);
    218   } else if (ui_direction_is_rtl && has_rtl_chars) {
    219     WrapStringWithRTLFormatting(text);
    220     text->insert(0, 1, kRightToLeftMark);
    221     text->push_back(kRightToLeftMark);
    222   } else if (ui_direction_is_rtl) {
    223     WrapStringWithLTRFormatting(text);
    224     text->insert(0, 1, kRightToLeftMark);
    225     text->push_back(kRightToLeftMark);
    226   }
    227 
    228   return true;
    229 }
    230 
    231 #endif  // !OS_WIN
    232 
    233 #if defined(WCHAR_T_IS_UTF32)
    234 bool AdjustStringForLocaleDirection(std::wstring* text) {
    235   string16 temp = WideToUTF16(*text);
    236   if (AdjustStringForLocaleDirection(&temp)) {
    237     // We should only touch the output on success.
    238     *text = UTF16ToWide(temp);
    239     return true;
    240   }
    241   return false;
    242 }
    243 #endif
    244 
    245 bool StringContainsStrongRTLChars(const string16& text) {
    246   const UChar* string = text.c_str();
    247   size_t length = text.length();
    248   size_t position = 0;
    249   while (position < length) {
    250     UChar32 character;
    251     size_t next_position = position;
    252     U16_NEXT(string, next_position, length, character);
    253 
    254     // Now that we have the character, we use ICU in order to query for the
    255     // appropriate Unicode BiDi character type.
    256     int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
    257     if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
    258       return true;
    259 
    260     position = next_position;
    261   }
    262 
    263   return false;
    264 }
    265 
    266 #if defined(WCHAR_T_IS_UTF32)
    267 bool StringContainsStrongRTLChars(const std::wstring& text) {
    268   return StringContainsStrongRTLChars(WideToUTF16(text));
    269 }
    270 #endif
    271 
    272 void WrapStringWithLTRFormatting(string16* text) {
    273   if (text->empty())
    274     return;
    275 
    276   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
    277   text->insert(0, 1, kLeftToRightEmbeddingMark);
    278 
    279   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
    280   text->push_back(kPopDirectionalFormatting);
    281 }
    282 
    283 #if defined(WCHAR_T_IS_UTF32)
    284 void WrapStringWithLTRFormatting(std::wstring* text) {
    285   if (text->empty())
    286     return;
    287 
    288   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
    289   text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark));
    290 
    291   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
    292   text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
    293 }
    294 #endif
    295 
    296 void WrapStringWithRTLFormatting(string16* text) {
    297   if (text->empty())
    298     return;
    299 
    300   // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
    301   text->insert(0, 1, kRightToLeftEmbeddingMark);
    302 
    303   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
    304   text->push_back(kPopDirectionalFormatting);
    305 }
    306 
    307 #if defined(WCHAR_T_IS_UTF32)
    308 void WrapStringWithRTLFormatting(std::wstring* text) {
    309   if (text->empty())
    310     return;
    311 
    312   // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
    313   text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark));
    314 
    315   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
    316   text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
    317 }
    318 #endif
    319 
    320 void WrapPathWithLTRFormatting(const FilePath& path,
    321                                string16* rtl_safe_path) {
    322   // Wrap the overall path with LRE-PDF pair which essentialy marks the
    323   // string as a Left-To-Right string.
    324   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
    325   rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
    326 #if defined(OS_MACOSX)
    327     rtl_safe_path->append(UTF8ToUTF16(path.value()));
    328 #elif defined(OS_WIN)
    329     rtl_safe_path->append(path.value());
    330 #else  // defined(OS_POSIX) && !defined(OS_MACOSX)
    331     std::wstring wide_path = base::SysNativeMBToWide(path.value());
    332     rtl_safe_path->append(WideToUTF16(wide_path));
    333 #endif
    334   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
    335   rtl_safe_path->push_back(kPopDirectionalFormatting);
    336 }
    337 
    338 string16 GetDisplayStringInLTRDirectionality(const string16& text) {
    339   if (!IsRTL())
    340     return text;
    341   string16 text_mutable(text);
    342   WrapStringWithLTRFormatting(&text_mutable);
    343   return text_mutable;
    344 }
    345 
    346 const string16 StripWrappingBidiControlCharacters(const string16& text) {
    347   if (text.empty())
    348     return text;
    349   size_t begin_index = 0;
    350   char16 begin = text[begin_index];
    351   if (begin == kLeftToRightEmbeddingMark ||
    352       begin == kRightToLeftEmbeddingMark ||
    353       begin == kLeftToRightOverride ||
    354       begin == kRightToLeftOverride)
    355     ++begin_index;
    356   size_t end_index = text.length() - 1;
    357   if (text[end_index] == kPopDirectionalFormatting)
    358     --end_index;
    359   return text.substr(begin_index, end_index - begin_index + 1);
    360 }
    361 
    362 }  // namespace i18n
    363 }  // namespace base
    364