Home | History | Annotate | Download | only in i18n
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/i18n/rtl.h"
      6 
      7 #include "base/files/file_path.h"
      8 #include "base/logging.h"
      9 #include "base/strings/string_util.h"
     10 #include "base/strings/sys_string_conversions.h"
     11 #include "base/strings/utf_string_conversions.h"
     12 #include "third_party/icu/source/common/unicode/locid.h"
     13 #include "third_party/icu/source/common/unicode/uchar.h"
     14 #include "third_party/icu/source/common/unicode/uscript.h"
     15 #include "third_party/icu/source/i18n/unicode/coll.h"
     16 
     17 #if defined(TOOLKIT_GTK)
     18 #include <gtk/gtk.h>
     19 #endif
     20 
     21 namespace {
     22 
     23 // Extract language, country and variant, but ignore keywords.  For example,
     24 // en-US, ca@valencia, ca-ES@valencia.
     25 std::string GetLocaleString(const icu::Locale& locale) {
     26   const char* language = locale.getLanguage();
     27   const char* country = locale.getCountry();
     28   const char* variant = locale.getVariant();
     29 
     30   std::string result =
     31       (language != NULL && *language != '\0') ? language : "und";
     32 
     33   if (country != NULL && *country != '\0') {
     34     result += '-';
     35     result += country;
     36   }
     37 
     38   if (variant != NULL && *variant != '\0') {
     39     std::string variant_str(variant);
     40     StringToLowerASCII(&variant_str);
     41     result += '@' + variant_str;
     42   }
     43 
     44   return result;
     45 }
     46 
     47 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
     48 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
     49 // http://unicode.org/reports/tr9/ for more information.
     50 base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
     51   // Now that we have the character, we use ICU in order to query for the
     52   // appropriate Unicode BiDi character type.
     53   int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
     54   if ((property == U_RIGHT_TO_LEFT) ||
     55       (property == U_RIGHT_TO_LEFT_ARABIC) ||
     56       (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
     57       (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
     58     return base::i18n::RIGHT_TO_LEFT;
     59   } else if ((property == U_LEFT_TO_RIGHT) ||
     60              (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
     61              (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
     62     return base::i18n::LEFT_TO_RIGHT;
     63   }
     64   return base::i18n::UNKNOWN_DIRECTION;
     65 }
     66 
     67 }  // namespace
     68 
     69 namespace base {
     70 namespace i18n {
     71 
     72 // Represents the locale-specific ICU text direction.
     73 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
     74 
     75 // Convert the ICU default locale to a string.
     76 std::string GetConfiguredLocale() {
     77   return GetLocaleString(icu::Locale::getDefault());
     78 }
     79 
     80 // Convert the ICU canonicalized locale to a string.
     81 std::string GetCanonicalLocale(const char* locale) {
     82   return GetLocaleString(icu::Locale::createCanonical(locale));
     83 }
     84 
     85 // Convert Chrome locale name to ICU locale name
     86 std::string ICULocaleName(const std::string& locale_string) {
     87   // If not Spanish, just return it.
     88   if (locale_string.substr(0, 2) != "es")
     89     return locale_string;
     90   // Expand es to es-ES.
     91   if (LowerCaseEqualsASCII(locale_string, "es"))
     92     return "es-ES";
     93   // Map es-419 (Latin American Spanish) to es-FOO depending on the system
     94   // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
     95   // to es-MX (the most populous in Spanish-speaking Latin America).
     96   if (LowerCaseEqualsASCII(locale_string, "es-419")) {
     97     const icu::Locale& locale = icu::Locale::getDefault();
     98     std::string language = locale.getLanguage();
     99     const char* country = locale.getCountry();
    100     if (LowerCaseEqualsASCII(language, "es") &&
    101       !LowerCaseEqualsASCII(country, "es")) {
    102         language += '-';
    103         language += country;
    104         return language;
    105     }
    106     return "es-MX";
    107   }
    108   // Currently, Chrome has only "es" and "es-419", but later we may have
    109   // more specific "es-RR".
    110   return locale_string;
    111 }
    112 
    113 void SetICUDefaultLocale(const std::string& locale_string) {
    114   icu::Locale locale(ICULocaleName(locale_string).c_str());
    115   UErrorCode error_code = U_ZERO_ERROR;
    116   icu::Locale::setDefault(locale, error_code);
    117   // This return value is actually bogus because Locale object is
    118   // an ID and setDefault seems to always succeed (regardless of the
    119   // presence of actual locale data). However,
    120   // it does not hurt to have it as a sanity check.
    121   DCHECK(U_SUCCESS(error_code));
    122   g_icu_text_direction = UNKNOWN_DIRECTION;
    123 }
    124 
    125 bool IsRTL() {
    126 #if defined(TOOLKIT_GTK)
    127   GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
    128   return gtk_dir == GTK_TEXT_DIR_RTL;
    129 #else
    130   return ICUIsRTL();
    131 #endif
    132 }
    133 
    134 bool ICUIsRTL() {
    135   if (g_icu_text_direction == UNKNOWN_DIRECTION) {
    136     const icu::Locale& locale = icu::Locale::getDefault();
    137     g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
    138   }
    139   return g_icu_text_direction == RIGHT_TO_LEFT;
    140 }
    141 
    142 TextDirection GetTextDirectionForLocale(const char* locale_name) {
    143   UErrorCode status = U_ZERO_ERROR;
    144   ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
    145   DCHECK(U_SUCCESS(status));
    146   // Treat anything other than RTL as LTR.
    147   return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
    148 }
    149 
    150 TextDirection GetFirstStrongCharacterDirection(const string16& text) {
    151   const UChar* string = text.c_str();
    152   size_t length = text.length();
    153   size_t position = 0;
    154   while (position < length) {
    155     UChar32 character;
    156     size_t next_position = position;
    157     U16_NEXT(string, next_position, length, character);
    158     TextDirection direction = GetCharacterDirection(character);
    159     if (direction != UNKNOWN_DIRECTION)
    160       return direction;
    161     position = next_position;
    162   }
    163   return LEFT_TO_RIGHT;
    164 }
    165 
    166 TextDirection GetStringDirection(const string16& text) {
    167   const UChar* string = text.c_str();
    168   size_t length = text.length();
    169   size_t position = 0;
    170 
    171   TextDirection result(UNKNOWN_DIRECTION);
    172   while (position < length) {
    173     UChar32 character;
    174     size_t next_position = position;
    175     U16_NEXT(string, next_position, length, character);
    176     TextDirection direction = GetCharacterDirection(character);
    177     if (direction != UNKNOWN_DIRECTION) {
    178       if (result != UNKNOWN_DIRECTION && result != direction)
    179         return UNKNOWN_DIRECTION;
    180       result = direction;
    181     }
    182     position = next_position;
    183   }
    184 
    185   // Handle the case of a string not containing any strong directionality
    186   // characters defaulting to LEFT_TO_RIGHT.
    187   if (result == UNKNOWN_DIRECTION)
    188     return LEFT_TO_RIGHT;
    189 
    190   return result;
    191 }
    192 
    193 #if defined(OS_WIN)
    194 bool AdjustStringForLocaleDirection(string16* text) {
    195   if (!IsRTL() || text->empty())
    196     return false;
    197 
    198   // Marking the string as LTR if the locale is RTL and the string does not
    199   // contain strong RTL characters. Otherwise, mark the string as RTL.
    200   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
    201   if (!has_rtl_chars)
    202     WrapStringWithLTRFormatting(text);
    203   else
    204     WrapStringWithRTLFormatting(text);
    205 
    206   return true;
    207 }
    208 
    209 bool UnadjustStringForLocaleDirection(string16* text) {
    210   if (!IsRTL() || text->empty())
    211     return false;
    212 
    213   *text = StripWrappingBidiControlCharacters(*text);
    214   return true;
    215 }
    216 #else
    217 bool AdjustStringForLocaleDirection(string16* text) {
    218   // On OS X & GTK the directionality of a label is determined by the first
    219   // strongly directional character.
    220   // However, we want to make sure that in an LTR-language-UI all strings are
    221   // left aligned and vice versa.
    222   // A problem can arise if we display a string which starts with user input.
    223   // User input may be of the opposite directionality to the UI. So the whole
    224   // string will be displayed in the opposite directionality, e.g. if we want to
    225   // display in an LTR UI [such as US English]:
    226   //
    227   // EMAN_NOISNETXE is now installed.
    228   //
    229   // Since EXTENSION_NAME begins with a strong RTL char, the label's
    230   // directionality will be set to RTL and the string will be displayed visually
    231   // as:
    232   //
    233   // .is now installed EMAN_NOISNETXE
    234   //
    235   // In order to solve this issue, we prepend an LRM to the string. An LRM is a
    236   // strongly directional LTR char.
    237   // We also append an LRM at the end, which ensures that we're in an LTR
    238   // context.
    239 
    240   // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
    241   // box so there is no issue with displaying zero-width bidi control characters
    242   // on any system.  Thus no need for the !IsRTL() check here.
    243   if (text->empty())
    244     return false;
    245 
    246   bool ui_direction_is_rtl = IsRTL();
    247 
    248   bool has_rtl_chars = StringContainsStrongRTLChars(*text);
    249   if (!ui_direction_is_rtl && has_rtl_chars) {
    250     WrapStringWithRTLFormatting(text);
    251     text->insert(0U, 1U, kLeftToRightMark);
    252     text->push_back(kLeftToRightMark);
    253   } else if (ui_direction_is_rtl && has_rtl_chars) {
    254     WrapStringWithRTLFormatting(text);
    255     text->insert(0U, 1U, kRightToLeftMark);
    256     text->push_back(kRightToLeftMark);
    257   } else if (ui_direction_is_rtl) {
    258     WrapStringWithLTRFormatting(text);
    259     text->insert(0U, 1U, kRightToLeftMark);
    260     text->push_back(kRightToLeftMark);
    261   } else {
    262     return false;
    263   }
    264 
    265   return true;
    266 }
    267 
    268 bool UnadjustStringForLocaleDirection(string16* text) {
    269   if (text->empty())
    270     return false;
    271 
    272   size_t begin_index = 0;
    273   char16 begin = text->at(begin_index);
    274   if (begin == kLeftToRightMark ||
    275       begin == kRightToLeftMark) {
    276     ++begin_index;
    277   }
    278 
    279   size_t end_index = text->length() - 1;
    280   char16 end = text->at(end_index);
    281   if (end == kLeftToRightMark ||
    282       end == kRightToLeftMark) {
    283     --end_index;
    284   }
    285 
    286   string16 unmarked_text =
    287       text->substr(begin_index, end_index - begin_index + 1);
    288   *text = StripWrappingBidiControlCharacters(unmarked_text);
    289   return true;
    290 }
    291 
    292 #endif  // !OS_WIN
    293 
    294 bool StringContainsStrongRTLChars(const string16& text) {
    295   const UChar* string = text.c_str();
    296   size_t length = text.length();
    297   size_t position = 0;
    298   while (position < length) {
    299     UChar32 character;
    300     size_t next_position = position;
    301     U16_NEXT(string, next_position, length, character);
    302 
    303     // Now that we have the character, we use ICU in order to query for the
    304     // appropriate Unicode BiDi character type.
    305     int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
    306     if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
    307       return true;
    308 
    309     position = next_position;
    310   }
    311 
    312   return false;
    313 }
    314 
    315 void WrapStringWithLTRFormatting(string16* text) {
    316   if (text->empty())
    317     return;
    318 
    319   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
    320   text->insert(0U, 1U, kLeftToRightEmbeddingMark);
    321 
    322   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
    323   text->push_back(kPopDirectionalFormatting);
    324 }
    325 
    326 void WrapStringWithRTLFormatting(string16* text) {
    327   if (text->empty())
    328     return;
    329 
    330   // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
    331   text->insert(0U, 1U, kRightToLeftEmbeddingMark);
    332 
    333   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
    334   text->push_back(kPopDirectionalFormatting);
    335 }
    336 
    337 void WrapPathWithLTRFormatting(const FilePath& path,
    338                                string16* rtl_safe_path) {
    339   // Wrap the overall path with LRE-PDF pair which essentialy marks the
    340   // string as a Left-To-Right string.
    341   // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
    342   rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
    343 #if defined(OS_MACOSX)
    344     rtl_safe_path->append(UTF8ToUTF16(path.value()));
    345 #elif defined(OS_WIN)
    346     rtl_safe_path->append(path.value());
    347 #else  // defined(OS_POSIX) && !defined(OS_MACOSX)
    348     std::wstring wide_path = base::SysNativeMBToWide(path.value());
    349     rtl_safe_path->append(WideToUTF16(wide_path));
    350 #endif
    351   // Inserting a PDF (Pop Directional Formatting) mark as the last character.
    352   rtl_safe_path->push_back(kPopDirectionalFormatting);
    353 }
    354 
    355 string16 GetDisplayStringInLTRDirectionality(const string16& text) {
    356   // Always wrap the string in RTL UI (it may be appended to RTL string).
    357   // Also wrap strings with an RTL first strong character direction in LTR UI.
    358   if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
    359     string16 text_mutable(text);
    360     WrapStringWithLTRFormatting(&text_mutable);
    361     return text_mutable;
    362   }
    363   return text;
    364 }
    365 
    366 string16 StripWrappingBidiControlCharacters(const string16& text) {
    367   if (text.empty())
    368     return text;
    369   size_t begin_index = 0;
    370   char16 begin = text[begin_index];
    371   if (begin == kLeftToRightEmbeddingMark ||
    372       begin == kRightToLeftEmbeddingMark ||
    373       begin == kLeftToRightOverride ||
    374       begin == kRightToLeftOverride)
    375     ++begin_index;
    376   size_t end_index = text.length() - 1;
    377   if (text[end_index] == kPopDirectionalFormatting)
    378     --end_index;
    379   return text.substr(begin_index, end_index - begin_index + 1);
    380 }
    381 
    382 }  // namespace i18n
    383 }  // namespace base
    384