Home | History | Annotate | Download | only in objects
      1 // Copyright 2018 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef V8_INTL_SUPPORT
      6 #error Internationalization is expected to be enabled.
      7 #endif  // V8_INTL_SUPPORT
      8 
      9 #include "src/objects/js-list-format.h"
     10 
     11 #include <memory>
     12 #include <vector>
     13 
     14 #include "src/elements.h"
     15 #include "src/heap/factory.h"
     16 #include "src/isolate.h"
     17 #include "src/objects-inl.h"
     18 #include "src/objects/intl-objects.h"
     19 #include "src/objects/js-array-inl.h"
     20 #include "src/objects/js-list-format-inl.h"
     21 #include "src/objects/managed.h"
     22 #include "unicode/listformatter.h"
     23 
     24 namespace v8 {
     25 namespace internal {
     26 
     27 namespace {
     28 const char* kStandard = "standard";
     29 const char* kOr = "or";
     30 const char* kUnit = "unit";
     31 const char* kStandardShort = "standard-short";
     32 const char* kUnitShort = "unit-short";
     33 const char* kUnitNarrow = "unit-narrow";
     34 
     35 const char* GetIcuStyleString(JSListFormat::Style style,
     36                               JSListFormat::Type type) {
     37   switch (type) {
     38     case JSListFormat::Type::CONJUNCTION:
     39       switch (style) {
     40         case JSListFormat::Style::LONG:
     41           return kStandard;
     42         case JSListFormat::Style::SHORT:
     43           return kStandardShort;
     44         case JSListFormat::Style::NARROW:
     45           // Currently, ListFormat::createInstance on "standard-narrow" will
     46           // fail so we use "standard-short" here.
     47           // See https://unicode.org/cldr/trac/ticket/11254
     48           // TODO(ftang): change to return kStandardNarrow; after the above
     49           // issue fixed in CLDR/ICU.
     50           // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
     51           // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
     52           return kStandardShort;
     53         case JSListFormat::Style::COUNT:
     54           UNREACHABLE();
     55       }
     56     case JSListFormat::Type::DISJUNCTION:
     57       switch (style) {
     58         // Currently, ListFormat::createInstance on "or-short" and "or-narrow"
     59         // will fail so we use "or" here.
     60         // See https://unicode.org/cldr/trac/ticket/11254
     61         // TODO(ftang): change to return kOr, kOrShort or kOrNarrow depend on
     62         // style after the above issue fixed in CLDR/ICU.
     63         // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
     64         // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
     65         case JSListFormat::Style::LONG:
     66         case JSListFormat::Style::SHORT:
     67         case JSListFormat::Style::NARROW:
     68           return kOr;
     69         case JSListFormat::Style::COUNT:
     70           UNREACHABLE();
     71       }
     72     case JSListFormat::Type::UNIT:
     73       switch (style) {
     74         case JSListFormat::Style::LONG:
     75           return kUnit;
     76         case JSListFormat::Style::SHORT:
     77           return kUnitShort;
     78         case JSListFormat::Style::NARROW:
     79           return kUnitNarrow;
     80         case JSListFormat::Style::COUNT:
     81           UNREACHABLE();
     82       }
     83     case JSListFormat::Type::COUNT:
     84       UNREACHABLE();
     85   }
     86 }
     87 
     88 }  // namespace
     89 
     90 JSListFormat::Style get_style(const char* str) {
     91   switch (str[0]) {
     92     case 'n':
     93       if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW;
     94       break;
     95     case 'l':
     96       if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG;
     97       break;
     98     case 's':
     99       if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT;
    100       break;
    101   }
    102   UNREACHABLE();
    103 }
    104 
    105 JSListFormat::Type get_type(const char* str) {
    106   switch (str[0]) {
    107     case 'c':
    108       if (strcmp(&str[1], "onjunction") == 0)
    109         return JSListFormat::Type::CONJUNCTION;
    110       break;
    111     case 'd':
    112       if (strcmp(&str[1], "isjunction") == 0)
    113         return JSListFormat::Type::DISJUNCTION;
    114       break;
    115     case 'u':
    116       if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT;
    117       break;
    118   }
    119   UNREACHABLE();
    120 }
    121 
    122 MaybeHandle<JSListFormat> JSListFormat::InitializeListFormat(
    123     Isolate* isolate, Handle<JSListFormat> list_format_holder,
    124     Handle<Object> input_locales, Handle<Object> input_options) {
    125   Factory* factory = isolate->factory();
    126   list_format_holder->set_flags(0);
    127 
    128   Handle<JSReceiver> options;
    129   // 2. If options is undefined, then
    130   if (input_options->IsUndefined(isolate)) {
    131     // a. Let options be ObjectCreate(null).
    132     options = isolate->factory()->NewJSObjectWithNullProto();
    133     // 3. Else
    134   } else {
    135     // a. Let options be ? ToObject(options).
    136     ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
    137                                Object::ToObject(isolate, input_options),
    138                                JSListFormat);
    139   }
    140 
    141   // 5. Let t be GetOption(options, "type", "string", "conjunction",
    142   //    "disjunction", "unit", "conjunction").
    143   std::unique_ptr<char[]> type_str = nullptr;
    144   std::vector<const char*> type_values = {"conjunction", "disjunction", "unit"};
    145   Maybe<bool> maybe_found_type = Intl::GetStringOption(
    146       isolate, options, "type", type_values, "Intl.ListFormat", &type_str);
    147   Type type_enum = Type::CONJUNCTION;
    148   MAYBE_RETURN(maybe_found_type, MaybeHandle<JSListFormat>());
    149   if (maybe_found_type.FromJust()) {
    150     DCHECK_NOT_NULL(type_str.get());
    151     type_enum = get_type(type_str.get());
    152   }
    153   // 6. Set listFormat.[[Type]] to t.
    154   list_format_holder->set_type(type_enum);
    155 
    156   // 7. Let s be ? GetOption(options, "style", "string",
    157   //                          "long", "short", "narrow", "long").
    158   std::unique_ptr<char[]> style_str = nullptr;
    159   std::vector<const char*> style_values = {"long", "short", "narrow"};
    160   Maybe<bool> maybe_found_style = Intl::GetStringOption(
    161       isolate, options, "style", style_values, "Intl.ListFormat", &style_str);
    162   Style style_enum = Style::LONG;
    163   MAYBE_RETURN(maybe_found_style, MaybeHandle<JSListFormat>());
    164   if (maybe_found_style.FromJust()) {
    165     DCHECK_NOT_NULL(style_str.get());
    166     style_enum = get_style(style_str.get());
    167   }
    168   // 15. Set listFormat.[[Style]] to s.
    169   list_format_holder->set_style(style_enum);
    170 
    171   // 10. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]],
    172   // requestedLocales, opt, undefined, localeData).
    173   Handle<JSObject> r;
    174   ASSIGN_RETURN_ON_EXCEPTION(
    175       isolate, r,
    176       Intl::ResolveLocale(isolate, "listformat", input_locales, options),
    177       JSListFormat);
    178 
    179   Handle<Object> locale_obj =
    180       JSObject::GetDataProperty(r, factory->locale_string());
    181   Handle<String> locale;
    182   ASSIGN_RETURN_ON_EXCEPTION(
    183       isolate, locale, Object::ToString(isolate, locale_obj), JSListFormat);
    184 
    185   // 18. Set listFormat.[[Locale]] to the value of r.[[Locale]].
    186   list_format_holder->set_locale(*locale);
    187 
    188   std::unique_ptr<char[]> locale_name = locale->ToCString();
    189   icu::Locale icu_locale(locale_name.get());
    190   UErrorCode status = U_ZERO_ERROR;
    191   icu::ListFormatter* formatter = icu::ListFormatter::createInstance(
    192       icu_locale, GetIcuStyleString(style_enum, type_enum), status);
    193   if (U_FAILURE(status)) {
    194     delete formatter;
    195     FATAL("Failed to create ICU list formatter, are ICU data files missing?");
    196   }
    197   CHECK_NOT_NULL(formatter);
    198 
    199   Handle<Managed<icu::ListFormatter>> managed_formatter =
    200       Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter);
    201 
    202   list_format_holder->set_formatter(*managed_formatter);
    203   return list_format_holder;
    204 }
    205 
    206 Handle<JSObject> JSListFormat::ResolvedOptions(
    207     Isolate* isolate, Handle<JSListFormat> format_holder) {
    208   Factory* factory = isolate->factory();
    209   Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
    210   Handle<String> locale(format_holder->locale(), isolate);
    211   JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
    212                         NONE);
    213   JSObject::AddProperty(isolate, result, factory->style_string(),
    214                         format_holder->StyleAsString(), NONE);
    215   JSObject::AddProperty(isolate, result, factory->type_string(),
    216                         format_holder->TypeAsString(), NONE);
    217   return result;
    218 }
    219 
    220 icu::ListFormatter* JSListFormat::UnpackFormatter(Isolate* isolate,
    221                                                   Handle<JSListFormat> holder) {
    222   return Managed<icu::ListFormatter>::cast(holder->formatter())->raw();
    223 }
    224 
    225 Handle<String> JSListFormat::StyleAsString() const {
    226   switch (style()) {
    227     case Style::LONG:
    228       return GetReadOnlyRoots().long_string_handle();
    229     case Style::SHORT:
    230       return GetReadOnlyRoots().short_string_handle();
    231     case Style::NARROW:
    232       return GetReadOnlyRoots().narrow_string_handle();
    233     case Style::COUNT:
    234       UNREACHABLE();
    235   }
    236 }
    237 
    238 Handle<String> JSListFormat::TypeAsString() const {
    239   switch (type()) {
    240     case Type::CONJUNCTION:
    241       return GetReadOnlyRoots().conjunction_string_handle();
    242     case Type::DISJUNCTION:
    243       return GetReadOnlyRoots().disjunction_string_handle();
    244     case Type::UNIT:
    245       return GetReadOnlyRoots().unit_string_handle();
    246     case Type::COUNT:
    247       UNREACHABLE();
    248   }
    249 }
    250 
    251 namespace {
    252 
    253 // TODO(ftang) remove the following hack after icu::ListFormat support
    254 // FieldPosition.
    255 // This is a temporary workaround until icu::ListFormat support FieldPosition
    256 // It is inefficient and won't work correctly on the edge case that the input
    257 // contains fraction of the list pattern.
    258 // For example the following under English will mark the "an" incorrectly
    259 // since the formatted is "a, b, and an".
    260 // listFormat.formatToParts(["a", "b", "an"])
    261 // https://ssl.icu-project.org/trac/ticket/13754
    262 MaybeHandle<JSArray> GenerateListFormatParts(
    263     Isolate* isolate, const icu::UnicodeString& formatted,
    264     const icu::UnicodeString items[], int length) {
    265   Factory* factory = isolate->factory();
    266   int estimate_size = length * 2 + 1;
    267   Handle<JSArray> array = factory->NewJSArray(estimate_size);
    268   int index = 0;
    269   int last_pos = 0;
    270   for (int i = 0; i < length; i++) {
    271     int found = formatted.indexOf(items[i], last_pos);
    272     DCHECK_GE(found, 0);
    273     if (found > last_pos) {
    274       Handle<String> substring;
    275       ASSIGN_RETURN_ON_EXCEPTION(
    276           isolate, substring,
    277           Intl::ToString(isolate, formatted, last_pos, found), JSArray);
    278       Intl::AddElement(isolate, array, index++, factory->literal_string(),
    279                        substring);
    280     }
    281     last_pos = found + items[i].length();
    282     Handle<String> substring;
    283     ASSIGN_RETURN_ON_EXCEPTION(
    284         isolate, substring, Intl::ToString(isolate, formatted, found, last_pos),
    285         JSArray);
    286     Intl::AddElement(isolate, array, index++, factory->element_string(),
    287                      substring);
    288   }
    289   if (last_pos < formatted.length()) {
    290     Handle<String> substring;
    291     ASSIGN_RETURN_ON_EXCEPTION(
    292         isolate, substring,
    293         Intl::ToString(isolate, formatted, last_pos, formatted.length()),
    294         JSArray);
    295     Intl::AddElement(isolate, array, index++, factory->literal_string(),
    296                      substring);
    297   }
    298   return array;
    299 }
    300 
    301 // Extract String from JSArray into array of UnicodeString
    302 Maybe<bool> ToUnicodeStringArray(Isolate* isolate, Handle<JSArray> array,
    303                                  icu::UnicodeString items[], uint32_t length) {
    304   Factory* factory = isolate->factory();
    305   // In general, ElementsAccessor::Get actually isn't guaranteed to give us the
    306   // elements in order. But given that it was created by a builtin we control,
    307   // it shouldn't be possible for it to be problematic. Add DCHECK to ensure
    308   // that.
    309   DCHECK(array->HasFastPackedElements());
    310   auto* accessor = array->GetElementsAccessor();
    311   DCHECK(length == accessor->NumberOfElements(*array));
    312   // ecma402 #sec-createpartsfromlist
    313   // 2. If list contains any element value such that Type(value) is not String,
    314   // throw a TypeError exception.
    315   //
    316   // Per spec it looks like we're supposed to throw a TypeError exception if the
    317   // item isn't already a string, rather than coercing to a string. Moreover,
    318   // the way the spec's written it looks like we're supposed to run through the
    319   // whole list to check that they're all strings before going further.
    320   for (uint32_t i = 0; i < length; i++) {
    321     Handle<Object> item = accessor->Get(array, i);
    322     DCHECK(!item.is_null());
    323     if (!item->IsString()) {
    324       THROW_NEW_ERROR_RETURN_VALUE(
    325           isolate,
    326           NewTypeError(MessageTemplate::kArrayItemNotType,
    327                        factory->NewStringFromStaticChars("list"),
    328                        factory->NewNumber(i),
    329                        factory->NewStringFromStaticChars("String")),
    330           Nothing<bool>());
    331     }
    332   }
    333   for (uint32_t i = 0; i < length; i++) {
    334     Handle<String> string = Handle<String>::cast(accessor->Get(array, i));
    335     DisallowHeapAllocation no_gc;
    336     string = String::Flatten(isolate, string);
    337     std::unique_ptr<uc16[]> sap;
    338     items[i] =
    339         icu::UnicodeString(GetUCharBufferFromFlat(string->GetFlatContent(),
    340                                                   &sap, string->length()),
    341                            string->length());
    342   }
    343   return Just(true);
    344 }
    345 
    346 }  // namespace
    347 
    348 Maybe<bool> FormatListCommon(Isolate* isolate,
    349                              Handle<JSListFormat> format_holder,
    350                              Handle<JSArray> list,
    351                              icu::UnicodeString& formatted, uint32_t* length,
    352                              std::unique_ptr<icu::UnicodeString[]>& array) {
    353   DCHECK(!list->IsUndefined());
    354 
    355   icu::ListFormatter* formatter =
    356       JSListFormat::UnpackFormatter(isolate, format_holder);
    357   CHECK_NOT_NULL(formatter);
    358 
    359   *length = list->GetElementsAccessor()->NumberOfElements(*list);
    360   array.reset(new icu::UnicodeString[*length]);
    361 
    362   // ecma402 #sec-createpartsfromlist
    363   // 2. If list contains any element value such that Type(value) is not String,
    364   // throw a TypeError exception.
    365   MAYBE_RETURN(ToUnicodeStringArray(isolate, list, array.get(), *length),
    366                Nothing<bool>());
    367 
    368   UErrorCode status = U_ZERO_ERROR;
    369   formatter->format(array.get(), *length, formatted, status);
    370   DCHECK(U_SUCCESS(status));
    371   return Just(true);
    372 }
    373 
    374 // ecma402 #sec-formatlist
    375 MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate,
    376                                              Handle<JSListFormat> format_holder,
    377                                              Handle<JSArray> list) {
    378   icu::UnicodeString formatted;
    379   uint32_t length;
    380   std::unique_ptr<icu::UnicodeString[]> array;
    381   MAYBE_RETURN(
    382       FormatListCommon(isolate, format_holder, list, formatted, &length, array),
    383       Handle<String>());
    384   return Intl::ToString(isolate, formatted);
    385 }
    386 
    387 // ecma42 #sec-formatlisttoparts
    388 MaybeHandle<JSArray> JSListFormat::FormatListToParts(
    389     Isolate* isolate, Handle<JSListFormat> format_holder,
    390     Handle<JSArray> list) {
    391   icu::UnicodeString formatted;
    392   uint32_t length;
    393   std::unique_ptr<icu::UnicodeString[]> array;
    394   MAYBE_RETURN(
    395       FormatListCommon(isolate, format_holder, list, formatted, &length, array),
    396       Handle<JSArray>());
    397   return GenerateListFormatParts(isolate, formatted, array.get(), length);
    398 }
    399 
    400 }  // namespace internal
    401 }  // namespace v8
    402