1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/i18n/rtl.h" 6 7 #include "base/files/file_path.h" 8 #include "base/logging.h" 9 #include "base/strings/string_util.h" 10 #include "base/strings/sys_string_conversions.h" 11 #include "base/strings/utf_string_conversions.h" 12 #include "third_party/icu/source/common/unicode/locid.h" 13 #include "third_party/icu/source/common/unicode/uchar.h" 14 #include "third_party/icu/source/common/unicode/uscript.h" 15 #include "third_party/icu/source/i18n/unicode/coll.h" 16 17 namespace { 18 19 // Extract language, country and variant, but ignore keywords. For example, 20 // en-US, ca@valencia, ca-ES@valencia. 21 std::string GetLocaleString(const icu::Locale& locale) { 22 const char* language = locale.getLanguage(); 23 const char* country = locale.getCountry(); 24 const char* variant = locale.getVariant(); 25 26 std::string result = 27 (language != NULL && *language != '\0') ? language : "und"; 28 29 if (country != NULL && *country != '\0') { 30 result += '-'; 31 result += country; 32 } 33 34 if (variant != NULL && *variant != '\0') { 35 std::string variant_str(variant); 36 StringToLowerASCII(&variant_str); 37 result += '@' + variant_str; 38 } 39 40 return result; 41 } 42 43 // Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong 44 // directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to 45 // http://unicode.org/reports/tr9/ for more information. 46 base::i18n::TextDirection GetCharacterDirection(UChar32 character) { 47 // Now that we have the character, we use ICU in order to query for the 48 // appropriate Unicode BiDi character type. 49 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 50 if ((property == U_RIGHT_TO_LEFT) || 51 (property == U_RIGHT_TO_LEFT_ARABIC) || 52 (property == U_RIGHT_TO_LEFT_EMBEDDING) || 53 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { 54 return base::i18n::RIGHT_TO_LEFT; 55 } else if ((property == U_LEFT_TO_RIGHT) || 56 (property == U_LEFT_TO_RIGHT_EMBEDDING) || 57 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { 58 return base::i18n::LEFT_TO_RIGHT; 59 } 60 return base::i18n::UNKNOWN_DIRECTION; 61 } 62 63 } // namespace 64 65 namespace base { 66 namespace i18n { 67 68 // Represents the locale-specific ICU text direction. 69 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; 70 71 // Convert the ICU default locale to a string. 72 std::string GetConfiguredLocale() { 73 return GetLocaleString(icu::Locale::getDefault()); 74 } 75 76 // Convert the ICU canonicalized locale to a string. 77 std::string GetCanonicalLocale(const char* locale) { 78 return GetLocaleString(icu::Locale::createCanonical(locale)); 79 } 80 81 // Convert Chrome locale name to ICU locale name 82 std::string ICULocaleName(const std::string& locale_string) { 83 // If not Spanish, just return it. 84 if (locale_string.substr(0, 2) != "es") 85 return locale_string; 86 // Expand es to es-ES. 87 if (LowerCaseEqualsASCII(locale_string, "es")) 88 return "es-ES"; 89 // Map es-419 (Latin American Spanish) to es-FOO depending on the system 90 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map 91 // to es-MX (the most populous in Spanish-speaking Latin America). 92 if (LowerCaseEqualsASCII(locale_string, "es-419")) { 93 const icu::Locale& locale = icu::Locale::getDefault(); 94 std::string language = locale.getLanguage(); 95 const char* country = locale.getCountry(); 96 if (LowerCaseEqualsASCII(language, "es") && 97 !LowerCaseEqualsASCII(country, "es")) { 98 language += '-'; 99 language += country; 100 return language; 101 } 102 return "es-MX"; 103 } 104 // Currently, Chrome has only "es" and "es-419", but later we may have 105 // more specific "es-RR". 106 return locale_string; 107 } 108 109 void SetICUDefaultLocale(const std::string& locale_string) { 110 icu::Locale locale(ICULocaleName(locale_string).c_str()); 111 UErrorCode error_code = U_ZERO_ERROR; 112 icu::Locale::setDefault(locale, error_code); 113 // This return value is actually bogus because Locale object is 114 // an ID and setDefault seems to always succeed (regardless of the 115 // presence of actual locale data). However, 116 // it does not hurt to have it as a sanity check. 117 DCHECK(U_SUCCESS(error_code)); 118 g_icu_text_direction = UNKNOWN_DIRECTION; 119 } 120 121 bool IsRTL() { 122 return ICUIsRTL(); 123 } 124 125 bool ICUIsRTL() { 126 if (g_icu_text_direction == UNKNOWN_DIRECTION) { 127 const icu::Locale& locale = icu::Locale::getDefault(); 128 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); 129 } 130 return g_icu_text_direction == RIGHT_TO_LEFT; 131 } 132 133 TextDirection GetTextDirectionForLocale(const char* locale_name) { 134 UErrorCode status = U_ZERO_ERROR; 135 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); 136 DCHECK(U_SUCCESS(status)); 137 // Treat anything other than RTL as LTR. 138 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; 139 } 140 141 TextDirection GetFirstStrongCharacterDirection(const string16& text) { 142 const UChar* string = text.c_str(); 143 size_t length = text.length(); 144 size_t position = 0; 145 while (position < length) { 146 UChar32 character; 147 size_t next_position = position; 148 U16_NEXT(string, next_position, length, character); 149 TextDirection direction = GetCharacterDirection(character); 150 if (direction != UNKNOWN_DIRECTION) 151 return direction; 152 position = next_position; 153 } 154 return LEFT_TO_RIGHT; 155 } 156 157 TextDirection GetLastStrongCharacterDirection(const string16& text) { 158 const UChar* string = text.c_str(); 159 size_t position = text.length(); 160 while (position > 0) { 161 UChar32 character; 162 size_t prev_position = position; 163 U16_PREV(string, 0, prev_position, character); 164 TextDirection direction = GetCharacterDirection(character); 165 if (direction != UNKNOWN_DIRECTION) 166 return direction; 167 position = prev_position; 168 } 169 return LEFT_TO_RIGHT; 170 } 171 172 TextDirection GetStringDirection(const string16& text) { 173 const UChar* string = text.c_str(); 174 size_t length = text.length(); 175 size_t position = 0; 176 177 TextDirection result(UNKNOWN_DIRECTION); 178 while (position < length) { 179 UChar32 character; 180 size_t next_position = position; 181 U16_NEXT(string, next_position, length, character); 182 TextDirection direction = GetCharacterDirection(character); 183 if (direction != UNKNOWN_DIRECTION) { 184 if (result != UNKNOWN_DIRECTION && result != direction) 185 return UNKNOWN_DIRECTION; 186 result = direction; 187 } 188 position = next_position; 189 } 190 191 // Handle the case of a string not containing any strong directionality 192 // characters defaulting to LEFT_TO_RIGHT. 193 if (result == UNKNOWN_DIRECTION) 194 return LEFT_TO_RIGHT; 195 196 return result; 197 } 198 199 #if defined(OS_WIN) 200 bool AdjustStringForLocaleDirection(string16* text) { 201 if (!IsRTL() || text->empty()) 202 return false; 203 204 // Marking the string as LTR if the locale is RTL and the string does not 205 // contain strong RTL characters. Otherwise, mark the string as RTL. 206 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 207 if (!has_rtl_chars) 208 WrapStringWithLTRFormatting(text); 209 else 210 WrapStringWithRTLFormatting(text); 211 212 return true; 213 } 214 215 bool UnadjustStringForLocaleDirection(string16* text) { 216 if (!IsRTL() || text->empty()) 217 return false; 218 219 *text = StripWrappingBidiControlCharacters(*text); 220 return true; 221 } 222 #else 223 bool AdjustStringForLocaleDirection(string16* text) { 224 // On OS X & GTK the directionality of a label is determined by the first 225 // strongly directional character. 226 // However, we want to make sure that in an LTR-language-UI all strings are 227 // left aligned and vice versa. 228 // A problem can arise if we display a string which starts with user input. 229 // User input may be of the opposite directionality to the UI. So the whole 230 // string will be displayed in the opposite directionality, e.g. if we want to 231 // display in an LTR UI [such as US English]: 232 // 233 // EMAN_NOISNETXE is now installed. 234 // 235 // Since EXTENSION_NAME begins with a strong RTL char, the label's 236 // directionality will be set to RTL and the string will be displayed visually 237 // as: 238 // 239 // .is now installed EMAN_NOISNETXE 240 // 241 // In order to solve this issue, we prepend an LRM to the string. An LRM is a 242 // strongly directional LTR char. 243 // We also append an LRM at the end, which ensures that we're in an LTR 244 // context. 245 246 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the 247 // box so there is no issue with displaying zero-width bidi control characters 248 // on any system. Thus no need for the !IsRTL() check here. 249 if (text->empty()) 250 return false; 251 252 bool ui_direction_is_rtl = IsRTL(); 253 254 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 255 if (!ui_direction_is_rtl && has_rtl_chars) { 256 WrapStringWithRTLFormatting(text); 257 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 258 kLeftToRightMark); 259 text->push_back(kLeftToRightMark); 260 } else if (ui_direction_is_rtl && has_rtl_chars) { 261 WrapStringWithRTLFormatting(text); 262 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 263 kRightToLeftMark); 264 text->push_back(kRightToLeftMark); 265 } else if (ui_direction_is_rtl) { 266 WrapStringWithLTRFormatting(text); 267 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 268 kRightToLeftMark); 269 text->push_back(kRightToLeftMark); 270 } else { 271 return false; 272 } 273 274 return true; 275 } 276 277 bool UnadjustStringForLocaleDirection(string16* text) { 278 if (text->empty()) 279 return false; 280 281 size_t begin_index = 0; 282 char16 begin = text->at(begin_index); 283 if (begin == kLeftToRightMark || 284 begin == kRightToLeftMark) { 285 ++begin_index; 286 } 287 288 size_t end_index = text->length() - 1; 289 char16 end = text->at(end_index); 290 if (end == kLeftToRightMark || 291 end == kRightToLeftMark) { 292 --end_index; 293 } 294 295 string16 unmarked_text = 296 text->substr(begin_index, end_index - begin_index + 1); 297 *text = StripWrappingBidiControlCharacters(unmarked_text); 298 return true; 299 } 300 301 #endif // !OS_WIN 302 303 bool StringContainsStrongRTLChars(const string16& text) { 304 const UChar* string = text.c_str(); 305 size_t length = text.length(); 306 size_t position = 0; 307 while (position < length) { 308 UChar32 character; 309 size_t next_position = position; 310 U16_NEXT(string, next_position, length, character); 311 312 // Now that we have the character, we use ICU in order to query for the 313 // appropriate Unicode BiDi character type. 314 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 315 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) 316 return true; 317 318 position = next_position; 319 } 320 321 return false; 322 } 323 324 void WrapStringWithLTRFormatting(string16* text) { 325 if (text->empty()) 326 return; 327 328 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 329 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 330 kLeftToRightEmbeddingMark); 331 332 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 333 text->push_back(kPopDirectionalFormatting); 334 } 335 336 void WrapStringWithRTLFormatting(string16* text) { 337 if (text->empty()) 338 return; 339 340 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 341 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 342 kRightToLeftEmbeddingMark); 343 344 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 345 text->push_back(kPopDirectionalFormatting); 346 } 347 348 void WrapPathWithLTRFormatting(const FilePath& path, 349 string16* rtl_safe_path) { 350 // Wrap the overall path with LRE-PDF pair which essentialy marks the 351 // string as a Left-To-Right string. 352 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 353 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); 354 #if defined(OS_MACOSX) 355 rtl_safe_path->append(UTF8ToUTF16(path.value())); 356 #elif defined(OS_WIN) 357 rtl_safe_path->append(path.value()); 358 #else // defined(OS_POSIX) && !defined(OS_MACOSX) 359 std::wstring wide_path = base::SysNativeMBToWide(path.value()); 360 rtl_safe_path->append(WideToUTF16(wide_path)); 361 #endif 362 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 363 rtl_safe_path->push_back(kPopDirectionalFormatting); 364 } 365 366 string16 GetDisplayStringInLTRDirectionality(const string16& text) { 367 // Always wrap the string in RTL UI (it may be appended to RTL string). 368 // Also wrap strings with an RTL first strong character direction in LTR UI. 369 if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) { 370 string16 text_mutable(text); 371 WrapStringWithLTRFormatting(&text_mutable); 372 return text_mutable; 373 } 374 return text; 375 } 376 377 string16 StripWrappingBidiControlCharacters(const string16& text) { 378 if (text.empty()) 379 return text; 380 size_t begin_index = 0; 381 char16 begin = text[begin_index]; 382 if (begin == kLeftToRightEmbeddingMark || 383 begin == kRightToLeftEmbeddingMark || 384 begin == kLeftToRightOverride || 385 begin == kRightToLeftOverride) 386 ++begin_index; 387 size_t end_index = text.length() - 1; 388 if (text[end_index] == kPopDirectionalFormatting) 389 --end_index; 390 return text.substr(begin_index, end_index - begin_index + 1); 391 } 392 393 } // namespace i18n 394 } // namespace base 395