1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/i18n/rtl.h" 6 7 #include "base/files/file_path.h" 8 #include "base/logging.h" 9 #include "base/strings/string_util.h" 10 #include "base/strings/sys_string_conversions.h" 11 #include "base/strings/utf_string_conversions.h" 12 #include "third_party/icu/source/common/unicode/locid.h" 13 #include "third_party/icu/source/common/unicode/uchar.h" 14 #include "third_party/icu/source/common/unicode/uscript.h" 15 #include "third_party/icu/source/i18n/unicode/coll.h" 16 17 #if defined(TOOLKIT_GTK) 18 #include <gtk/gtk.h> 19 #endif 20 21 namespace { 22 23 // Extract language, country and variant, but ignore keywords. For example, 24 // en-US, ca@valencia, ca-ES@valencia. 25 std::string GetLocaleString(const icu::Locale& locale) { 26 const char* language = locale.getLanguage(); 27 const char* country = locale.getCountry(); 28 const char* variant = locale.getVariant(); 29 30 std::string result = 31 (language != NULL && *language != '\0') ? language : "und"; 32 33 if (country != NULL && *country != '\0') { 34 result += '-'; 35 result += country; 36 } 37 38 if (variant != NULL && *variant != '\0') { 39 std::string variant_str(variant); 40 StringToLowerASCII(&variant_str); 41 result += '@' + variant_str; 42 } 43 44 return result; 45 } 46 47 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong 48 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to 49 // http://unicode.org/reports/tr9/ for more information. 50 base::i18n::TextDirection GetCharacterDirection(UChar32 character) { 51 // Now that we have the character, we use ICU in order to query for the 52 // appropriate Unicode BiDi character type. 53 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 54 if ((property == U_RIGHT_TO_LEFT) || 55 (property == U_RIGHT_TO_LEFT_ARABIC) || 56 (property == U_RIGHT_TO_LEFT_EMBEDDING) || 57 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { 58 return base::i18n::RIGHT_TO_LEFT; 59 } else if ((property == U_LEFT_TO_RIGHT) || 60 (property == U_LEFT_TO_RIGHT_EMBEDDING) || 61 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { 62 return base::i18n::LEFT_TO_RIGHT; 63 } 64 return base::i18n::UNKNOWN_DIRECTION; 65 } 66 67 } // namespace 68 69 namespace base { 70 namespace i18n { 71 72 // Represents the locale-specific ICU text direction. 73 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; 74 75 // Convert the ICU default locale to a string. 76 std::string GetConfiguredLocale() { 77 return GetLocaleString(icu::Locale::getDefault()); 78 } 79 80 // Convert the ICU canonicalized locale to a string. 81 std::string GetCanonicalLocale(const char* locale) { 82 return GetLocaleString(icu::Locale::createCanonical(locale)); 83 } 84 85 // Convert Chrome locale name to ICU locale name 86 std::string ICULocaleName(const std::string& locale_string) { 87 // If not Spanish, just return it. 88 if (locale_string.substr(0, 2) != "es") 89 return locale_string; 90 // Expand es to es-ES. 91 if (LowerCaseEqualsASCII(locale_string, "es")) 92 return "es-ES"; 93 // Map es-419 (Latin American Spanish) to es-FOO depending on the system 94 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map 95 // to es-MX (the most populous in Spanish-speaking Latin America). 96 if (LowerCaseEqualsASCII(locale_string, "es-419")) { 97 const icu::Locale& locale = icu::Locale::getDefault(); 98 std::string language = locale.getLanguage(); 99 const char* country = locale.getCountry(); 100 if (LowerCaseEqualsASCII(language, "es") && 101 !LowerCaseEqualsASCII(country, "es")) { 102 language += '-'; 103 language += country; 104 return language; 105 } 106 return "es-MX"; 107 } 108 // Currently, Chrome has only "es" and "es-419", but later we may have 109 // more specific "es-RR". 110 return locale_string; 111 } 112 113 void SetICUDefaultLocale(const std::string& locale_string) { 114 icu::Locale locale(ICULocaleName(locale_string).c_str()); 115 UErrorCode error_code = U_ZERO_ERROR; 116 icu::Locale::setDefault(locale, error_code); 117 // This return value is actually bogus because Locale object is 118 // an ID and setDefault seems to always succeed (regardless of the 119 // presence of actual locale data). However, 120 // it does not hurt to have it as a sanity check. 121 DCHECK(U_SUCCESS(error_code)); 122 g_icu_text_direction = UNKNOWN_DIRECTION; 123 } 124 125 bool IsRTL() { 126 #if defined(TOOLKIT_GTK) 127 GtkTextDirection gtk_dir = gtk_widget_get_default_direction(); 128 return gtk_dir == GTK_TEXT_DIR_RTL; 129 #else 130 return ICUIsRTL(); 131 #endif 132 } 133 134 bool ICUIsRTL() { 135 if (g_icu_text_direction == UNKNOWN_DIRECTION) { 136 const icu::Locale& locale = icu::Locale::getDefault(); 137 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); 138 } 139 return g_icu_text_direction == RIGHT_TO_LEFT; 140 } 141 142 TextDirection GetTextDirectionForLocale(const char* locale_name) { 143 UErrorCode status = U_ZERO_ERROR; 144 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); 145 DCHECK(U_SUCCESS(status)); 146 // Treat anything other than RTL as LTR. 147 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; 148 } 149 150 TextDirection GetFirstStrongCharacterDirection(const string16& text) { 151 const UChar* string = text.c_str(); 152 size_t length = text.length(); 153 size_t position = 0; 154 while (position < length) { 155 UChar32 character; 156 size_t next_position = position; 157 U16_NEXT(string, next_position, length, character); 158 TextDirection direction = GetCharacterDirection(character); 159 if (direction != UNKNOWN_DIRECTION) 160 return direction; 161 position = next_position; 162 } 163 return LEFT_TO_RIGHT; 164 } 165 166 TextDirection GetStringDirection(const string16& text) { 167 const UChar* string = text.c_str(); 168 size_t length = text.length(); 169 size_t position = 0; 170 171 TextDirection result(UNKNOWN_DIRECTION); 172 while (position < length) { 173 UChar32 character; 174 size_t next_position = position; 175 U16_NEXT(string, next_position, length, character); 176 TextDirection direction = GetCharacterDirection(character); 177 if (direction != UNKNOWN_DIRECTION) { 178 if (result != UNKNOWN_DIRECTION && result != direction) 179 return UNKNOWN_DIRECTION; 180 result = direction; 181 } 182 position = next_position; 183 } 184 185 // Handle the case of a string not containing any strong directionality 186 // characters defaulting to LEFT_TO_RIGHT. 187 if (result == UNKNOWN_DIRECTION) 188 return LEFT_TO_RIGHT; 189 190 return result; 191 } 192 193 #if defined(OS_WIN) 194 bool AdjustStringForLocaleDirection(string16* text) { 195 if (!IsRTL() || text->empty()) 196 return false; 197 198 // Marking the string as LTR if the locale is RTL and the string does not 199 // contain strong RTL characters. Otherwise, mark the string as RTL. 200 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 201 if (!has_rtl_chars) 202 WrapStringWithLTRFormatting(text); 203 else 204 WrapStringWithRTLFormatting(text); 205 206 return true; 207 } 208 209 bool UnadjustStringForLocaleDirection(string16* text) { 210 if (!IsRTL() || text->empty()) 211 return false; 212 213 *text = StripWrappingBidiControlCharacters(*text); 214 return true; 215 } 216 #else 217 bool AdjustStringForLocaleDirection(string16* text) { 218 // On OS X & GTK the directionality of a label is determined by the first 219 // strongly directional character. 220 // However, we want to make sure that in an LTR-language-UI all strings are 221 // left aligned and vice versa. 222 // A problem can arise if we display a string which starts with user input. 223 // User input may be of the opposite directionality to the UI. So the whole 224 // string will be displayed in the opposite directionality, e.g. if we want to 225 // display in an LTR UI [such as US English]: 226 // 227 // EMAN_NOISNETXE is now installed. 228 // 229 // Since EXTENSION_NAME begins with a strong RTL char, the label's 230 // directionality will be set to RTL and the string will be displayed visually 231 // as: 232 // 233 // .is now installed EMAN_NOISNETXE 234 // 235 // In order to solve this issue, we prepend an LRM to the string. An LRM is a 236 // strongly directional LTR char. 237 // We also append an LRM at the end, which ensures that we're in an LTR 238 // context. 239 240 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the 241 // box so there is no issue with displaying zero-width bidi control characters 242 // on any system. Thus no need for the !IsRTL() check here. 243 if (text->empty()) 244 return false; 245 246 bool ui_direction_is_rtl = IsRTL(); 247 248 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 249 if (!ui_direction_is_rtl && has_rtl_chars) { 250 WrapStringWithRTLFormatting(text); 251 text->insert(0U, 1U, kLeftToRightMark); 252 text->push_back(kLeftToRightMark); 253 } else if (ui_direction_is_rtl && has_rtl_chars) { 254 WrapStringWithRTLFormatting(text); 255 text->insert(0U, 1U, kRightToLeftMark); 256 text->push_back(kRightToLeftMark); 257 } else if (ui_direction_is_rtl) { 258 WrapStringWithLTRFormatting(text); 259 text->insert(0U, 1U, kRightToLeftMark); 260 text->push_back(kRightToLeftMark); 261 } else { 262 return false; 263 } 264 265 return true; 266 } 267 268 bool UnadjustStringForLocaleDirection(string16* text) { 269 if (text->empty()) 270 return false; 271 272 size_t begin_index = 0; 273 char16 begin = text->at(begin_index); 274 if (begin == kLeftToRightMark || 275 begin == kRightToLeftMark) { 276 ++begin_index; 277 } 278 279 size_t end_index = text->length() - 1; 280 char16 end = text->at(end_index); 281 if (end == kLeftToRightMark || 282 end == kRightToLeftMark) { 283 --end_index; 284 } 285 286 string16 unmarked_text = 287 text->substr(begin_index, end_index - begin_index + 1); 288 *text = StripWrappingBidiControlCharacters(unmarked_text); 289 return true; 290 } 291 292 #endif // !OS_WIN 293 294 bool StringContainsStrongRTLChars(const string16& text) { 295 const UChar* string = text.c_str(); 296 size_t length = text.length(); 297 size_t position = 0; 298 while (position < length) { 299 UChar32 character; 300 size_t next_position = position; 301 U16_NEXT(string, next_position, length, character); 302 303 // Now that we have the character, we use ICU in order to query for the 304 // appropriate Unicode BiDi character type. 305 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 306 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) 307 return true; 308 309 position = next_position; 310 } 311 312 return false; 313 } 314 315 void WrapStringWithLTRFormatting(string16* text) { 316 if (text->empty()) 317 return; 318 319 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 320 text->insert(0U, 1U, kLeftToRightEmbeddingMark); 321 322 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 323 text->push_back(kPopDirectionalFormatting); 324 } 325 326 void WrapStringWithRTLFormatting(string16* text) { 327 if (text->empty()) 328 return; 329 330 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 331 text->insert(0U, 1U, kRightToLeftEmbeddingMark); 332 333 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 334 text->push_back(kPopDirectionalFormatting); 335 } 336 337 void WrapPathWithLTRFormatting(const FilePath& path, 338 string16* rtl_safe_path) { 339 // Wrap the overall path with LRE-PDF pair which essentialy marks the 340 // string as a Left-To-Right string. 341 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 342 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); 343 #if defined(OS_MACOSX) 344 rtl_safe_path->append(UTF8ToUTF16(path.value())); 345 #elif defined(OS_WIN) 346 rtl_safe_path->append(path.value()); 347 #else // defined(OS_POSIX) && !defined(OS_MACOSX) 348 std::wstring wide_path = base::SysNativeMBToWide(path.value()); 349 rtl_safe_path->append(WideToUTF16(wide_path)); 350 #endif 351 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 352 rtl_safe_path->push_back(kPopDirectionalFormatting); 353 } 354 355 string16 GetDisplayStringInLTRDirectionality(const string16& text) { 356 // Always wrap the string in RTL UI (it may be appended to RTL string). 357 // Also wrap strings with an RTL first strong character direction in LTR UI. 358 if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) { 359 string16 text_mutable(text); 360 WrapStringWithLTRFormatting(&text_mutable); 361 return text_mutable; 362 } 363 return text; 364 } 365 366 string16 StripWrappingBidiControlCharacters(const string16& text) { 367 if (text.empty()) 368 return text; 369 size_t begin_index = 0; 370 char16 begin = text[begin_index]; 371 if (begin == kLeftToRightEmbeddingMark || 372 begin == kRightToLeftEmbeddingMark || 373 begin == kLeftToRightOverride || 374 begin == kRightToLeftOverride) 375 ++begin_index; 376 size_t end_index = text.length() - 1; 377 if (text[end_index] == kPopDirectionalFormatting) 378 --end_index; 379 return text.substr(begin_index, end_index - begin_index + 1); 380 } 381 382 } // namespace i18n 383 } // namespace base 384