1 // Copyright 2018 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_INTL_SUPPORT 6 #error Internationalization is expected to be enabled. 7 #endif // V8_INTL_SUPPORT 8 9 #include "src/objects/js-list-format.h" 10 11 #include <memory> 12 #include <vector> 13 14 #include "src/elements.h" 15 #include "src/heap/factory.h" 16 #include "src/isolate.h" 17 #include "src/objects-inl.h" 18 #include "src/objects/intl-objects.h" 19 #include "src/objects/js-array-inl.h" 20 #include "src/objects/js-list-format-inl.h" 21 #include "src/objects/managed.h" 22 #include "unicode/listformatter.h" 23 24 namespace v8 { 25 namespace internal { 26 27 namespace { 28 const char* kStandard = "standard"; 29 const char* kOr = "or"; 30 const char* kUnit = "unit"; 31 const char* kStandardShort = "standard-short"; 32 const char* kUnitShort = "unit-short"; 33 const char* kUnitNarrow = "unit-narrow"; 34 35 const char* GetIcuStyleString(JSListFormat::Style style, 36 JSListFormat::Type type) { 37 switch (type) { 38 case JSListFormat::Type::CONJUNCTION: 39 switch (style) { 40 case JSListFormat::Style::LONG: 41 return kStandard; 42 case JSListFormat::Style::SHORT: 43 return kStandardShort; 44 case JSListFormat::Style::NARROW: 45 // Currently, ListFormat::createInstance on "standard-narrow" will 46 // fail so we use "standard-short" here. 47 // See https://unicode.org/cldr/trac/ticket/11254 48 // TODO(ftang): change to return kStandardNarrow; after the above 49 // issue fixed in CLDR/ICU. 50 // CLDR bug: https://unicode.org/cldr/trac/ticket/11254 51 // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014 52 return kStandardShort; 53 case JSListFormat::Style::COUNT: 54 UNREACHABLE(); 55 } 56 case JSListFormat::Type::DISJUNCTION: 57 switch (style) { 58 // Currently, ListFormat::createInstance on "or-short" and "or-narrow" 59 // will fail so we use "or" here. 60 // See https://unicode.org/cldr/trac/ticket/11254 61 // TODO(ftang): change to return kOr, kOrShort or kOrNarrow depend on 62 // style after the above issue fixed in CLDR/ICU. 63 // CLDR bug: https://unicode.org/cldr/trac/ticket/11254 64 // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014 65 case JSListFormat::Style::LONG: 66 case JSListFormat::Style::SHORT: 67 case JSListFormat::Style::NARROW: 68 return kOr; 69 case JSListFormat::Style::COUNT: 70 UNREACHABLE(); 71 } 72 case JSListFormat::Type::UNIT: 73 switch (style) { 74 case JSListFormat::Style::LONG: 75 return kUnit; 76 case JSListFormat::Style::SHORT: 77 return kUnitShort; 78 case JSListFormat::Style::NARROW: 79 return kUnitNarrow; 80 case JSListFormat::Style::COUNT: 81 UNREACHABLE(); 82 } 83 case JSListFormat::Type::COUNT: 84 UNREACHABLE(); 85 } 86 } 87 88 } // namespace 89 90 JSListFormat::Style get_style(const char* str) { 91 switch (str[0]) { 92 case 'n': 93 if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW; 94 break; 95 case 'l': 96 if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG; 97 break; 98 case 's': 99 if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT; 100 break; 101 } 102 UNREACHABLE(); 103 } 104 105 JSListFormat::Type get_type(const char* str) { 106 switch (str[0]) { 107 case 'c': 108 if (strcmp(&str[1], "onjunction") == 0) 109 return JSListFormat::Type::CONJUNCTION; 110 break; 111 case 'd': 112 if (strcmp(&str[1], "isjunction") == 0) 113 return JSListFormat::Type::DISJUNCTION; 114 break; 115 case 'u': 116 if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT; 117 break; 118 } 119 UNREACHABLE(); 120 } 121 122 MaybeHandle<JSListFormat> JSListFormat::InitializeListFormat( 123 Isolate* isolate, Handle<JSListFormat> list_format_holder, 124 Handle<Object> input_locales, Handle<Object> input_options) { 125 Factory* factory = isolate->factory(); 126 list_format_holder->set_flags(0); 127 128 Handle<JSReceiver> options; 129 // 2. If options is undefined, then 130 if (input_options->IsUndefined(isolate)) { 131 // a. Let options be ObjectCreate(null). 132 options = isolate->factory()->NewJSObjectWithNullProto(); 133 // 3. Else 134 } else { 135 // a. Let options be ? ToObject(options). 136 ASSIGN_RETURN_ON_EXCEPTION(isolate, options, 137 Object::ToObject(isolate, input_options), 138 JSListFormat); 139 } 140 141 // 5. Let t be GetOption(options, "type", "string", "conjunction", 142 // "disjunction", "unit", "conjunction"). 143 std::unique_ptr<char[]> type_str = nullptr; 144 std::vector<const char*> type_values = {"conjunction", "disjunction", "unit"}; 145 Maybe<bool> maybe_found_type = Intl::GetStringOption( 146 isolate, options, "type", type_values, "Intl.ListFormat", &type_str); 147 Type type_enum = Type::CONJUNCTION; 148 MAYBE_RETURN(maybe_found_type, MaybeHandle<JSListFormat>()); 149 if (maybe_found_type.FromJust()) { 150 DCHECK_NOT_NULL(type_str.get()); 151 type_enum = get_type(type_str.get()); 152 } 153 // 6. Set listFormat.[[Type]] to t. 154 list_format_holder->set_type(type_enum); 155 156 // 7. Let s be ? GetOption(options, "style", "string", 157 // "long", "short", "narrow", "long"). 158 std::unique_ptr<char[]> style_str = nullptr; 159 std::vector<const char*> style_values = {"long", "short", "narrow"}; 160 Maybe<bool> maybe_found_style = Intl::GetStringOption( 161 isolate, options, "style", style_values, "Intl.ListFormat", &style_str); 162 Style style_enum = Style::LONG; 163 MAYBE_RETURN(maybe_found_style, MaybeHandle<JSListFormat>()); 164 if (maybe_found_style.FromJust()) { 165 DCHECK_NOT_NULL(style_str.get()); 166 style_enum = get_style(style_str.get()); 167 } 168 // 15. Set listFormat.[[Style]] to s. 169 list_format_holder->set_style(style_enum); 170 171 // 10. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]], 172 // requestedLocales, opt, undefined, localeData). 173 Handle<JSObject> r; 174 ASSIGN_RETURN_ON_EXCEPTION( 175 isolate, r, 176 Intl::ResolveLocale(isolate, "listformat", input_locales, options), 177 JSListFormat); 178 179 Handle<Object> locale_obj = 180 JSObject::GetDataProperty(r, factory->locale_string()); 181 Handle<String> locale; 182 ASSIGN_RETURN_ON_EXCEPTION( 183 isolate, locale, Object::ToString(isolate, locale_obj), JSListFormat); 184 185 // 18. Set listFormat.[[Locale]] to the value of r.[[Locale]]. 186 list_format_holder->set_locale(*locale); 187 188 std::unique_ptr<char[]> locale_name = locale->ToCString(); 189 icu::Locale icu_locale(locale_name.get()); 190 UErrorCode status = U_ZERO_ERROR; 191 icu::ListFormatter* formatter = icu::ListFormatter::createInstance( 192 icu_locale, GetIcuStyleString(style_enum, type_enum), status); 193 if (U_FAILURE(status)) { 194 delete formatter; 195 FATAL("Failed to create ICU list formatter, are ICU data files missing?"); 196 } 197 CHECK_NOT_NULL(formatter); 198 199 Handle<Managed<icu::ListFormatter>> managed_formatter = 200 Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter); 201 202 list_format_holder->set_formatter(*managed_formatter); 203 return list_format_holder; 204 } 205 206 Handle<JSObject> JSListFormat::ResolvedOptions( 207 Isolate* isolate, Handle<JSListFormat> format_holder) { 208 Factory* factory = isolate->factory(); 209 Handle<JSObject> result = factory->NewJSObject(isolate->object_function()); 210 Handle<String> locale(format_holder->locale(), isolate); 211 JSObject::AddProperty(isolate, result, factory->locale_string(), locale, 212 NONE); 213 JSObject::AddProperty(isolate, result, factory->style_string(), 214 format_holder->StyleAsString(), NONE); 215 JSObject::AddProperty(isolate, result, factory->type_string(), 216 format_holder->TypeAsString(), NONE); 217 return result; 218 } 219 220 icu::ListFormatter* JSListFormat::UnpackFormatter(Isolate* isolate, 221 Handle<JSListFormat> holder) { 222 return Managed<icu::ListFormatter>::cast(holder->formatter())->raw(); 223 } 224 225 Handle<String> JSListFormat::StyleAsString() const { 226 switch (style()) { 227 case Style::LONG: 228 return GetReadOnlyRoots().long_string_handle(); 229 case Style::SHORT: 230 return GetReadOnlyRoots().short_string_handle(); 231 case Style::NARROW: 232 return GetReadOnlyRoots().narrow_string_handle(); 233 case Style::COUNT: 234 UNREACHABLE(); 235 } 236 } 237 238 Handle<String> JSListFormat::TypeAsString() const { 239 switch (type()) { 240 case Type::CONJUNCTION: 241 return GetReadOnlyRoots().conjunction_string_handle(); 242 case Type::DISJUNCTION: 243 return GetReadOnlyRoots().disjunction_string_handle(); 244 case Type::UNIT: 245 return GetReadOnlyRoots().unit_string_handle(); 246 case Type::COUNT: 247 UNREACHABLE(); 248 } 249 } 250 251 namespace { 252 253 // TODO(ftang) remove the following hack after icu::ListFormat support 254 // FieldPosition. 255 // This is a temporary workaround until icu::ListFormat support FieldPosition 256 // It is inefficient and won't work correctly on the edge case that the input 257 // contains fraction of the list pattern. 258 // For example the following under English will mark the "an" incorrectly 259 // since the formatted is "a, b, and an". 260 // listFormat.formatToParts(["a", "b", "an"]) 261 // https://ssl.icu-project.org/trac/ticket/13754 262 MaybeHandle<JSArray> GenerateListFormatParts( 263 Isolate* isolate, const icu::UnicodeString& formatted, 264 const icu::UnicodeString items[], int length) { 265 Factory* factory = isolate->factory(); 266 int estimate_size = length * 2 + 1; 267 Handle<JSArray> array = factory->NewJSArray(estimate_size); 268 int index = 0; 269 int last_pos = 0; 270 for (int i = 0; i < length; i++) { 271 int found = formatted.indexOf(items[i], last_pos); 272 DCHECK_GE(found, 0); 273 if (found > last_pos) { 274 Handle<String> substring; 275 ASSIGN_RETURN_ON_EXCEPTION( 276 isolate, substring, 277 Intl::ToString(isolate, formatted, last_pos, found), JSArray); 278 Intl::AddElement(isolate, array, index++, factory->literal_string(), 279 substring); 280 } 281 last_pos = found + items[i].length(); 282 Handle<String> substring; 283 ASSIGN_RETURN_ON_EXCEPTION( 284 isolate, substring, Intl::ToString(isolate, formatted, found, last_pos), 285 JSArray); 286 Intl::AddElement(isolate, array, index++, factory->element_string(), 287 substring); 288 } 289 if (last_pos < formatted.length()) { 290 Handle<String> substring; 291 ASSIGN_RETURN_ON_EXCEPTION( 292 isolate, substring, 293 Intl::ToString(isolate, formatted, last_pos, formatted.length()), 294 JSArray); 295 Intl::AddElement(isolate, array, index++, factory->literal_string(), 296 substring); 297 } 298 return array; 299 } 300 301 // Extract String from JSArray into array of UnicodeString 302 Maybe<bool> ToUnicodeStringArray(Isolate* isolate, Handle<JSArray> array, 303 icu::UnicodeString items[], uint32_t length) { 304 Factory* factory = isolate->factory(); 305 // In general, ElementsAccessor::Get actually isn't guaranteed to give us the 306 // elements in order. But given that it was created by a builtin we control, 307 // it shouldn't be possible for it to be problematic. Add DCHECK to ensure 308 // that. 309 DCHECK(array->HasFastPackedElements()); 310 auto* accessor = array->GetElementsAccessor(); 311 DCHECK(length == accessor->NumberOfElements(*array)); 312 // ecma402 #sec-createpartsfromlist 313 // 2. If list contains any element value such that Type(value) is not String, 314 // throw a TypeError exception. 315 // 316 // Per spec it looks like we're supposed to throw a TypeError exception if the 317 // item isn't already a string, rather than coercing to a string. Moreover, 318 // the way the spec's written it looks like we're supposed to run through the 319 // whole list to check that they're all strings before going further. 320 for (uint32_t i = 0; i < length; i++) { 321 Handle<Object> item = accessor->Get(array, i); 322 DCHECK(!item.is_null()); 323 if (!item->IsString()) { 324 THROW_NEW_ERROR_RETURN_VALUE( 325 isolate, 326 NewTypeError(MessageTemplate::kArrayItemNotType, 327 factory->NewStringFromStaticChars("list"), 328 factory->NewNumber(i), 329 factory->NewStringFromStaticChars("String")), 330 Nothing<bool>()); 331 } 332 } 333 for (uint32_t i = 0; i < length; i++) { 334 Handle<String> string = Handle<String>::cast(accessor->Get(array, i)); 335 DisallowHeapAllocation no_gc; 336 string = String::Flatten(isolate, string); 337 std::unique_ptr<uc16[]> sap; 338 items[i] = 339 icu::UnicodeString(GetUCharBufferFromFlat(string->GetFlatContent(), 340 &sap, string->length()), 341 string->length()); 342 } 343 return Just(true); 344 } 345 346 } // namespace 347 348 Maybe<bool> FormatListCommon(Isolate* isolate, 349 Handle<JSListFormat> format_holder, 350 Handle<JSArray> list, 351 icu::UnicodeString& formatted, uint32_t* length, 352 std::unique_ptr<icu::UnicodeString[]>& array) { 353 DCHECK(!list->IsUndefined()); 354 355 icu::ListFormatter* formatter = 356 JSListFormat::UnpackFormatter(isolate, format_holder); 357 CHECK_NOT_NULL(formatter); 358 359 *length = list->GetElementsAccessor()->NumberOfElements(*list); 360 array.reset(new icu::UnicodeString[*length]); 361 362 // ecma402 #sec-createpartsfromlist 363 // 2. If list contains any element value such that Type(value) is not String, 364 // throw a TypeError exception. 365 MAYBE_RETURN(ToUnicodeStringArray(isolate, list, array.get(), *length), 366 Nothing<bool>()); 367 368 UErrorCode status = U_ZERO_ERROR; 369 formatter->format(array.get(), *length, formatted, status); 370 DCHECK(U_SUCCESS(status)); 371 return Just(true); 372 } 373 374 // ecma402 #sec-formatlist 375 MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate, 376 Handle<JSListFormat> format_holder, 377 Handle<JSArray> list) { 378 icu::UnicodeString formatted; 379 uint32_t length; 380 std::unique_ptr<icu::UnicodeString[]> array; 381 MAYBE_RETURN( 382 FormatListCommon(isolate, format_holder, list, formatted, &length, array), 383 Handle<String>()); 384 return Intl::ToString(isolate, formatted); 385 } 386 387 // ecma42 #sec-formatlisttoparts 388 MaybeHandle<JSArray> JSListFormat::FormatListToParts( 389 Isolate* isolate, Handle<JSListFormat> format_holder, 390 Handle<JSArray> list) { 391 icu::UnicodeString formatted; 392 uint32_t length; 393 std::unique_ptr<icu::UnicodeString[]> array; 394 MAYBE_RETURN( 395 FormatListCommon(isolate, format_holder, list, formatted, &length, array), 396 Handle<JSArray>()); 397 return GenerateListFormatParts(isolate, formatted, array.get(), length); 398 } 399 400 } // namespace internal 401 } // namespace v8 402