1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/i18n/rtl.h" 6 7 #include "base/file_path.h" 8 #include "base/logging.h" 9 #include "base/string_util.h" 10 #include "base/utf_string_conversions.h" 11 #include "base/sys_string_conversions.h" 12 #include "unicode/coll.h" 13 #include "unicode/locid.h" 14 #include "unicode/uchar.h" 15 #include "unicode/uscript.h" 16 17 #if defined(TOOLKIT_USES_GTK) 18 #include <gtk/gtk.h> 19 #endif 20 21 namespace { 22 23 // Extract language and country, ignore keywords, concatenate using dash. 24 std::string GetLocaleString(const icu::Locale& locale) { 25 const char* language = locale.getLanguage(); 26 const char* country = locale.getCountry(); 27 28 std::string result = 29 (language != NULL && *language != '\0') ? language : "und"; 30 31 if (country != NULL && *country != '\0') { 32 result += '-'; 33 result += country; 34 } 35 36 return result; 37 } 38 39 } // namespace 40 41 namespace base { 42 namespace i18n { 43 44 // Represents the locale-specific ICU text direction. 45 static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; 46 47 // Convert the ICU default locale to a string. 48 std::string GetConfiguredLocale() { 49 return GetLocaleString(icu::Locale::getDefault()); 50 } 51 52 // Convert the ICU canonicalized locale to a string. 53 std::string GetCanonicalLocale(const char* locale) { 54 return GetLocaleString(icu::Locale::createCanonical(locale)); 55 } 56 57 // Convert Chrome locale name to ICU locale name 58 std::string ICULocaleName(const std::string& locale_string) { 59 // If not Spanish, just return it. 60 if (locale_string.substr(0, 2) != "es") 61 return locale_string; 62 // Expand es to es-ES. 63 if (LowerCaseEqualsASCII(locale_string, "es")) 64 return "es-ES"; 65 // Map es-419 (Latin American Spanish) to es-FOO depending on the system 66 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map 67 // to es-MX (the most populous in Spanish-speaking Latin America). 68 if (LowerCaseEqualsASCII(locale_string, "es-419")) { 69 const icu::Locale& locale = icu::Locale::getDefault(); 70 std::string language = locale.getLanguage(); 71 const char* country = locale.getCountry(); 72 if (LowerCaseEqualsASCII(language, "es") && 73 !LowerCaseEqualsASCII(country, "es")) { 74 language += '-'; 75 language += country; 76 return language; 77 } 78 return "es-MX"; 79 } 80 // Currently, Chrome has only "es" and "es-419", but later we may have 81 // more specific "es-RR". 82 return locale_string; 83 } 84 85 void SetICUDefaultLocale(const std::string& locale_string) { 86 icu::Locale locale(ICULocaleName(locale_string).c_str()); 87 UErrorCode error_code = U_ZERO_ERROR; 88 icu::Locale::setDefault(locale, error_code); 89 // This return value is actually bogus because Locale object is 90 // an ID and setDefault seems to always succeed (regardless of the 91 // presence of actual locale data). However, 92 // it does not hurt to have it as a sanity check. 93 DCHECK(U_SUCCESS(error_code)); 94 g_icu_text_direction = UNKNOWN_DIRECTION; 95 96 // If we use Views toolkit on top of GtkWidget, then we need to keep 97 // GtkWidget's default text direction consistent with ICU's text direction. 98 // Because in this case ICU's text direction will be used instead. 99 // See IsRTL() function below. 100 #if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK) 101 gtk_widget_set_default_direction( 102 ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR); 103 #endif 104 } 105 106 bool IsRTL() { 107 #if defined(TOOLKIT_GTK) 108 GtkTextDirection gtk_dir = gtk_widget_get_default_direction(); 109 return (gtk_dir == GTK_TEXT_DIR_RTL); 110 #else 111 return ICUIsRTL(); 112 #endif 113 } 114 115 bool ICUIsRTL() { 116 if (g_icu_text_direction == UNKNOWN_DIRECTION) { 117 const icu::Locale& locale = icu::Locale::getDefault(); 118 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); 119 } 120 return g_icu_text_direction == RIGHT_TO_LEFT; 121 } 122 123 TextDirection GetTextDirectionForLocale(const char* locale_name) { 124 UErrorCode status = U_ZERO_ERROR; 125 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); 126 DCHECK(U_SUCCESS(status)); 127 // Treat anything other than RTL as LTR. 128 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; 129 } 130 131 TextDirection GetFirstStrongCharacterDirection(const string16& text) { 132 const UChar* string = text.c_str(); 133 size_t length = text.length(); 134 size_t position = 0; 135 while (position < length) { 136 UChar32 character; 137 size_t next_position = position; 138 U16_NEXT(string, next_position, length, character); 139 140 // Now that we have the character, we use ICU in order to query for the 141 // appropriate Unicode BiDi character type. 142 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 143 if ((property == U_RIGHT_TO_LEFT) || 144 (property == U_RIGHT_TO_LEFT_ARABIC) || 145 (property == U_RIGHT_TO_LEFT_EMBEDDING) || 146 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { 147 return RIGHT_TO_LEFT; 148 } else if ((property == U_LEFT_TO_RIGHT) || 149 (property == U_LEFT_TO_RIGHT_EMBEDDING) || 150 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { 151 return LEFT_TO_RIGHT; 152 } 153 154 position = next_position; 155 } 156 157 return LEFT_TO_RIGHT; 158 } 159 160 #if defined(WCHAR_T_IS_UTF32) 161 TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { 162 return GetFirstStrongCharacterDirection(WideToUTF16(text)); 163 } 164 #endif 165 166 #if defined(OS_WIN) 167 bool AdjustStringForLocaleDirection(string16* text) { 168 if (!IsRTL() || text->empty()) 169 return false; 170 171 // Marking the string as LTR if the locale is RTL and the string does not 172 // contain strong RTL characters. Otherwise, mark the string as RTL. 173 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 174 if (!has_rtl_chars) 175 WrapStringWithLTRFormatting(text); 176 else 177 WrapStringWithRTLFormatting(text); 178 179 return true; 180 } 181 #else 182 bool AdjustStringForLocaleDirection(string16* text) { 183 // On OS X & GTK the directionality of a label is determined by the first 184 // strongly directional character. 185 // However, we want to make sure that in an LTR-language-UI all strings are 186 // left aligned and vice versa. 187 // A problem can arise if we display a string which starts with user input. 188 // User input may be of the opposite directionality to the UI. So the whole 189 // string will be displayed in the opposite directionality, e.g. if we want to 190 // display in an LTR UI [such as US English]: 191 // 192 // EMAN_NOISNETXE is now installed. 193 // 194 // Since EXTENSION_NAME begins with a strong RTL char, the label's 195 // directionality will be set to RTL and the string will be displayed visually 196 // as: 197 // 198 // .is now installed EMAN_NOISNETXE 199 // 200 // In order to solve this issue, we prepend an LRM to the string. An LRM is a 201 // strongly directional LTR char. 202 // We also append an LRM at the end, which ensures that we're in an LTR 203 // context. 204 205 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the 206 // box so there is no issue with displaying zero-width bidi control characters 207 // on any system. Thus no need for the !IsRTL() check here. 208 if (text->empty()) 209 return false; 210 211 bool ui_direction_is_rtl = IsRTL(); 212 213 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 214 if (!ui_direction_is_rtl && has_rtl_chars) { 215 WrapStringWithRTLFormatting(text); 216 text->insert(0, 1, kLeftToRightMark); 217 text->push_back(kLeftToRightMark); 218 } else if (ui_direction_is_rtl && has_rtl_chars) { 219 WrapStringWithRTLFormatting(text); 220 text->insert(0, 1, kRightToLeftMark); 221 text->push_back(kRightToLeftMark); 222 } else if (ui_direction_is_rtl) { 223 WrapStringWithLTRFormatting(text); 224 text->insert(0, 1, kRightToLeftMark); 225 text->push_back(kRightToLeftMark); 226 } 227 228 return true; 229 } 230 231 #endif // !OS_WIN 232 233 #if defined(WCHAR_T_IS_UTF32) 234 bool AdjustStringForLocaleDirection(std::wstring* text) { 235 string16 temp = WideToUTF16(*text); 236 if (AdjustStringForLocaleDirection(&temp)) { 237 // We should only touch the output on success. 238 *text = UTF16ToWide(temp); 239 return true; 240 } 241 return false; 242 } 243 #endif 244 245 bool StringContainsStrongRTLChars(const string16& text) { 246 const UChar* string = text.c_str(); 247 size_t length = text.length(); 248 size_t position = 0; 249 while (position < length) { 250 UChar32 character; 251 size_t next_position = position; 252 U16_NEXT(string, next_position, length, character); 253 254 // Now that we have the character, we use ICU in order to query for the 255 // appropriate Unicode BiDi character type. 256 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 257 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) 258 return true; 259 260 position = next_position; 261 } 262 263 return false; 264 } 265 266 #if defined(WCHAR_T_IS_UTF32) 267 bool StringContainsStrongRTLChars(const std::wstring& text) { 268 return StringContainsStrongRTLChars(WideToUTF16(text)); 269 } 270 #endif 271 272 void WrapStringWithLTRFormatting(string16* text) { 273 if (text->empty()) 274 return; 275 276 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 277 text->insert(0, 1, kLeftToRightEmbeddingMark); 278 279 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 280 text->push_back(kPopDirectionalFormatting); 281 } 282 283 #if defined(WCHAR_T_IS_UTF32) 284 void WrapStringWithLTRFormatting(std::wstring* text) { 285 if (text->empty()) 286 return; 287 288 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 289 text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark)); 290 291 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 292 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 293 } 294 #endif 295 296 void WrapStringWithRTLFormatting(string16* text) { 297 if (text->empty()) 298 return; 299 300 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 301 text->insert(0, 1, kRightToLeftEmbeddingMark); 302 303 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 304 text->push_back(kPopDirectionalFormatting); 305 } 306 307 #if defined(WCHAR_T_IS_UTF32) 308 void WrapStringWithRTLFormatting(std::wstring* text) { 309 if (text->empty()) 310 return; 311 312 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 313 text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark)); 314 315 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 316 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 317 } 318 #endif 319 320 void WrapPathWithLTRFormatting(const FilePath& path, 321 string16* rtl_safe_path) { 322 // Wrap the overall path with LRE-PDF pair which essentialy marks the 323 // string as a Left-To-Right string. 324 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 325 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); 326 #if defined(OS_MACOSX) 327 rtl_safe_path->append(UTF8ToUTF16(path.value())); 328 #elif defined(OS_WIN) 329 rtl_safe_path->append(path.value()); 330 #else // defined(OS_POSIX) && !defined(OS_MACOSX) 331 std::wstring wide_path = base::SysNativeMBToWide(path.value()); 332 rtl_safe_path->append(WideToUTF16(wide_path)); 333 #endif 334 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 335 rtl_safe_path->push_back(kPopDirectionalFormatting); 336 } 337 338 string16 GetDisplayStringInLTRDirectionality(const string16& text) { 339 if (!IsRTL()) 340 return text; 341 string16 text_mutable(text); 342 WrapStringWithLTRFormatting(&text_mutable); 343 return text_mutable; 344 } 345 346 const string16 StripWrappingBidiControlCharacters(const string16& text) { 347 if (text.empty()) 348 return text; 349 size_t begin_index = 0; 350 char16 begin = text[begin_index]; 351 if (begin == kLeftToRightEmbeddingMark || 352 begin == kRightToLeftEmbeddingMark || 353 begin == kLeftToRightOverride || 354 begin == kRightToLeftOverride) 355 ++begin_index; 356 size_t end_index = text.length() - 1; 357 if (text[end_index] == kPopDirectionalFormatting) 358 --end_index; 359 return text.substr(begin_index, end_index - begin_index + 1); 360 } 361 362 } // namespace i18n 363 } // namespace base 364