1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 6 #ifdef V8_I18N_SUPPORT 7 #include "src/runtime/runtime-utils.h" 8 9 #include <memory> 10 11 #include "src/api-natives.h" 12 #include "src/api.h" 13 #include "src/arguments.h" 14 #include "src/factory.h" 15 #include "src/i18n.h" 16 #include "src/isolate-inl.h" 17 #include "src/messages.h" 18 #include "src/string-case.h" 19 #include "src/utils.h" 20 21 #include "unicode/brkiter.h" 22 #include "unicode/calendar.h" 23 #include "unicode/coll.h" 24 #include "unicode/curramt.h" 25 #include "unicode/datefmt.h" 26 #include "unicode/dcfmtsym.h" 27 #include "unicode/decimfmt.h" 28 #include "unicode/dtfmtsym.h" 29 #include "unicode/dtptngen.h" 30 #include "unicode/fieldpos.h" 31 #include "unicode/fpositer.h" 32 #include "unicode/locid.h" 33 #include "unicode/normalizer2.h" 34 #include "unicode/numfmt.h" 35 #include "unicode/numsys.h" 36 #include "unicode/rbbi.h" 37 #include "unicode/smpdtfmt.h" 38 #include "unicode/timezone.h" 39 #include "unicode/translit.h" 40 #include "unicode/uchar.h" 41 #include "unicode/ucol.h" 42 #include "unicode/ucurr.h" 43 #include "unicode/uloc.h" 44 #include "unicode/unistr.h" 45 #include "unicode/unum.h" 46 #include "unicode/ustring.h" 47 #include "unicode/uversion.h" 48 49 50 namespace v8 { 51 namespace internal { 52 namespace { 53 54 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat, 55 std::unique_ptr<uc16[]>* dest, 56 int32_t length) { 57 DCHECK(flat.IsFlat()); 58 if (flat.IsOneByte()) { 59 if (!*dest) { 60 dest->reset(NewArray<uc16>(length)); 61 CopyChars(dest->get(), flat.ToOneByteVector().start(), length); 62 } 63 return reinterpret_cast<const UChar*>(dest->get()); 64 } else { 65 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start()); 66 } 67 } 68 69 } // namespace 70 71 // ECMA 402 6.2.3 72 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) { 73 HandleScope scope(isolate); 74 Factory* factory = isolate->factory(); 75 76 DCHECK_EQ(1, args.length()); 77 CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0); 78 79 v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str)); 80 81 // Return value which denotes invalid language tag. 82 // TODO(jshin): Can uloc_{for,to}TanguageTag fail even for structually valid 83 // language tags? If not, just add CHECK instead of returning 'invalid-tag'. 84 const char* const kInvalidTag = "invalid-tag"; 85 86 UErrorCode error = U_ZERO_ERROR; 87 char icu_result[ULOC_FULLNAME_CAPACITY]; 88 int icu_length = 0; 89 90 uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY, 91 &icu_length, &error); 92 if (U_FAILURE(error) || icu_length == 0) { 93 return *factory->NewStringFromAsciiChecked(kInvalidTag); 94 } 95 96 char result[ULOC_FULLNAME_CAPACITY]; 97 98 // Force strict BCP47 rules. 99 uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error); 100 101 if (U_FAILURE(error)) { 102 return *factory->NewStringFromAsciiChecked(kInvalidTag); 103 } 104 105 return *factory->NewStringFromAsciiChecked(result); 106 } 107 108 109 RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) { 110 HandleScope scope(isolate); 111 Factory* factory = isolate->factory(); 112 113 DCHECK_EQ(1, args.length()); 114 CONVERT_ARG_HANDLE_CHECKED(String, service, 0); 115 116 const icu::Locale* available_locales = NULL; 117 int32_t count = 0; 118 119 if (service->IsUtf8EqualTo(CStrVector("collator"))) { 120 available_locales = icu::Collator::getAvailableLocales(count); 121 } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) { 122 available_locales = icu::NumberFormat::getAvailableLocales(count); 123 } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) { 124 available_locales = icu::DateFormat::getAvailableLocales(count); 125 } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) { 126 available_locales = icu::BreakIterator::getAvailableLocales(count); 127 } 128 129 UErrorCode error = U_ZERO_ERROR; 130 char result[ULOC_FULLNAME_CAPACITY]; 131 Handle<JSObject> locales = factory->NewJSObject(isolate->object_function()); 132 133 for (int32_t i = 0; i < count; ++i) { 134 const char* icu_name = available_locales[i].getName(); 135 136 error = U_ZERO_ERROR; 137 // No need to force strict BCP47 rules. 138 uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error); 139 if (U_FAILURE(error)) { 140 // This shouldn't happen, but lets not break the user. 141 continue; 142 } 143 144 RETURN_FAILURE_ON_EXCEPTION( 145 isolate, JSObject::SetOwnPropertyIgnoreAttributes( 146 locales, factory->NewStringFromAsciiChecked(result), 147 factory->NewNumber(i), NONE)); 148 } 149 150 return *locales; 151 } 152 153 154 RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) { 155 HandleScope scope(isolate); 156 Factory* factory = isolate->factory(); 157 158 DCHECK_EQ(0, args.length()); 159 160 icu::Locale default_locale; 161 162 // Set the locale 163 char result[ULOC_FULLNAME_CAPACITY]; 164 UErrorCode status = U_ZERO_ERROR; 165 uloc_toLanguageTag(default_locale.getName(), result, ULOC_FULLNAME_CAPACITY, 166 FALSE, &status); 167 if (U_SUCCESS(status)) { 168 return *factory->NewStringFromAsciiChecked(result); 169 } 170 171 return *factory->NewStringFromStaticChars("und"); 172 } 173 174 175 RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) { 176 HandleScope scope(isolate); 177 Factory* factory = isolate->factory(); 178 179 DCHECK_EQ(1, args.length()); 180 181 CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0); 182 183 uint32_t length = static_cast<uint32_t>(input->length()->Number()); 184 // Set some limit to prevent fuzz tests from going OOM. 185 // Can be bumped when callers' requirements change. 186 if (length >= 100) return isolate->ThrowIllegalOperation(); 187 Handle<FixedArray> output = factory->NewFixedArray(length); 188 Handle<Name> maximized = factory->NewStringFromStaticChars("maximized"); 189 Handle<Name> base = factory->NewStringFromStaticChars("base"); 190 for (unsigned int i = 0; i < length; ++i) { 191 Handle<Object> locale_id; 192 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 193 isolate, locale_id, JSReceiver::GetElement(isolate, input, i)); 194 if (!locale_id->IsString()) { 195 return isolate->Throw(*factory->illegal_argument_string()); 196 } 197 198 v8::String::Utf8Value utf8_locale_id( 199 v8::Utils::ToLocal(Handle<String>::cast(locale_id))); 200 201 UErrorCode error = U_ZERO_ERROR; 202 203 // Convert from BCP47 to ICU format. 204 // de-DE-u-co-phonebk -> de_DE@collation=phonebook 205 char icu_locale[ULOC_FULLNAME_CAPACITY]; 206 int icu_locale_length = 0; 207 uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY, 208 &icu_locale_length, &error); 209 if (U_FAILURE(error) || icu_locale_length == 0) { 210 return isolate->Throw(*factory->illegal_argument_string()); 211 } 212 213 // Maximize the locale. 214 // de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook 215 char icu_max_locale[ULOC_FULLNAME_CAPACITY]; 216 uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY, 217 &error); 218 219 // Remove extensions from maximized locale. 220 // de_Latn_DE@collation=phonebook -> de_Latn_DE 221 char icu_base_max_locale[ULOC_FULLNAME_CAPACITY]; 222 uloc_getBaseName(icu_max_locale, icu_base_max_locale, 223 ULOC_FULLNAME_CAPACITY, &error); 224 225 // Get original name without extensions. 226 // de_DE@collation=phonebook -> de_DE 227 char icu_base_locale[ULOC_FULLNAME_CAPACITY]; 228 uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY, 229 &error); 230 231 // Convert from ICU locale format to BCP47 format. 232 // de_Latn_DE -> de-Latn-DE 233 char base_max_locale[ULOC_FULLNAME_CAPACITY]; 234 uloc_toLanguageTag(icu_base_max_locale, base_max_locale, 235 ULOC_FULLNAME_CAPACITY, FALSE, &error); 236 237 // de_DE -> de-DE 238 char base_locale[ULOC_FULLNAME_CAPACITY]; 239 uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY, 240 FALSE, &error); 241 242 if (U_FAILURE(error)) { 243 return isolate->Throw(*factory->illegal_argument_string()); 244 } 245 246 Handle<JSObject> result = factory->NewJSObject(isolate->object_function()); 247 Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale); 248 JSObject::AddProperty(result, maximized, value, NONE); 249 value = factory->NewStringFromAsciiChecked(base_locale); 250 JSObject::AddProperty(result, base, value, NONE); 251 output->set(i, *result); 252 } 253 254 Handle<JSArray> result = factory->NewJSArrayWithElements(output); 255 result->set_length(Smi::FromInt(length)); 256 return *result; 257 } 258 259 260 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) { 261 HandleScope scope(isolate); 262 263 DCHECK_EQ(1, args.length()); 264 265 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0); 266 267 if (!input->IsJSObject()) return isolate->heap()->false_value(); 268 Handle<JSObject> obj = Handle<JSObject>::cast(input); 269 270 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol(); 271 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker); 272 return isolate->heap()->ToBoolean(!tag->IsUndefined(isolate)); 273 } 274 275 276 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) { 277 HandleScope scope(isolate); 278 279 DCHECK_EQ(2, args.length()); 280 281 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0); 282 CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1); 283 284 if (!input->IsJSObject()) return isolate->heap()->false_value(); 285 Handle<JSObject> obj = Handle<JSObject>::cast(input); 286 287 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol(); 288 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker); 289 return isolate->heap()->ToBoolean(tag->IsString() && 290 String::cast(*tag)->Equals(*expected_type)); 291 } 292 293 294 RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) { 295 HandleScope scope(isolate); 296 297 DCHECK_EQ(2, args.length()); 298 299 CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0); 300 CONVERT_ARG_HANDLE_CHECKED(String, type, 1); 301 302 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol(); 303 JSObject::SetProperty(input, marker, type, STRICT).Assert(); 304 305 return isolate->heap()->undefined_value(); 306 } 307 308 309 RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) { 310 HandleScope scope(isolate); 311 312 DCHECK_EQ(3, args.length()); 313 314 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0); 315 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1); 316 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2); 317 318 Handle<JSFunction> constructor( 319 isolate->native_context()->intl_date_time_format_function()); 320 321 Handle<JSObject> local_object; 322 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object, 323 JSObject::New(constructor, constructor)); 324 325 // Set date time formatter as internal field of the resulting JS object. 326 icu::SimpleDateFormat* date_format = 327 DateFormat::InitializeDateTimeFormat(isolate, locale, options, resolved); 328 329 if (!date_format) return isolate->ThrowIllegalOperation(); 330 331 local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format)); 332 333 // Make object handle weak so we can delete the data format once GC kicks in. 334 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object); 335 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(), 336 DateFormat::DeleteDateFormat, 337 WeakCallbackType::kInternalFields); 338 return *local_object; 339 } 340 341 342 RUNTIME_FUNCTION(Runtime_InternalDateFormat) { 343 HandleScope scope(isolate); 344 345 DCHECK_EQ(2, args.length()); 346 347 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0); 348 CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1); 349 350 Handle<Object> value; 351 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date)); 352 353 icu::SimpleDateFormat* date_format = 354 DateFormat::UnpackDateFormat(isolate, date_format_holder); 355 CHECK_NOT_NULL(date_format); 356 357 icu::UnicodeString result; 358 date_format->format(value->Number(), result); 359 360 RETURN_RESULT_OR_FAILURE( 361 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( 362 reinterpret_cast<const uint16_t*>(result.getBuffer()), 363 result.length()))); 364 } 365 366 namespace { 367 // The list comes from third_party/icu/source/i18n/unicode/udat.h. 368 // They're mapped to DateTimeFormat components listed at 369 // https://tc39.github.io/ecma402/#sec-datetimeformat-abstracts . 370 371 Handle<String> IcuDateFieldIdToDateType(int32_t field_id, Isolate* isolate) { 372 switch (field_id) { 373 case -1: 374 return isolate->factory()->literal_string(); 375 case UDAT_YEAR_FIELD: 376 case UDAT_EXTENDED_YEAR_FIELD: 377 case UDAT_YEAR_NAME_FIELD: 378 return isolate->factory()->year_string(); 379 case UDAT_MONTH_FIELD: 380 case UDAT_STANDALONE_MONTH_FIELD: 381 return isolate->factory()->month_string(); 382 case UDAT_DATE_FIELD: 383 return isolate->factory()->day_string(); 384 case UDAT_HOUR_OF_DAY1_FIELD: 385 case UDAT_HOUR_OF_DAY0_FIELD: 386 case UDAT_HOUR1_FIELD: 387 case UDAT_HOUR0_FIELD: 388 return isolate->factory()->hour_string(); 389 case UDAT_MINUTE_FIELD: 390 return isolate->factory()->minute_string(); 391 case UDAT_SECOND_FIELD: 392 return isolate->factory()->second_string(); 393 case UDAT_DAY_OF_WEEK_FIELD: 394 case UDAT_DOW_LOCAL_FIELD: 395 case UDAT_STANDALONE_DAY_FIELD: 396 return isolate->factory()->weekday_string(); 397 case UDAT_AM_PM_FIELD: 398 return isolate->factory()->dayperiod_string(); 399 case UDAT_TIMEZONE_FIELD: 400 case UDAT_TIMEZONE_RFC_FIELD: 401 case UDAT_TIMEZONE_GENERIC_FIELD: 402 case UDAT_TIMEZONE_SPECIAL_FIELD: 403 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: 404 case UDAT_TIMEZONE_ISO_FIELD: 405 case UDAT_TIMEZONE_ISO_LOCAL_FIELD: 406 return isolate->factory()->timeZoneName_string(); 407 case UDAT_ERA_FIELD: 408 return isolate->factory()->era_string(); 409 default: 410 // Other UDAT_*_FIELD's cannot show up because there is no way to specify 411 // them via options of Intl.DateTimeFormat. 412 UNREACHABLE(); 413 // To prevent MSVC from issuing C4715 warning. 414 return Handle<String>(); 415 } 416 } 417 418 bool AddElement(Handle<JSArray> array, int index, int32_t field_id, 419 const icu::UnicodeString& formatted, int32_t begin, int32_t end, 420 Isolate* isolate) { 421 HandleScope scope(isolate); 422 Factory* factory = isolate->factory(); 423 Handle<JSObject> element = factory->NewJSObject(isolate->object_function()); 424 Handle<String> value = IcuDateFieldIdToDateType(field_id, isolate); 425 JSObject::AddProperty(element, factory->type_string(), value, NONE); 426 427 icu::UnicodeString field(formatted.tempSubStringBetween(begin, end)); 428 ASSIGN_RETURN_ON_EXCEPTION_VALUE( 429 isolate, value, factory->NewStringFromTwoByte(Vector<const uint16_t>( 430 reinterpret_cast<const uint16_t*>(field.getBuffer()), 431 field.length())), 432 false); 433 434 JSObject::AddProperty(element, factory->value_string(), value, NONE); 435 RETURN_ON_EXCEPTION_VALUE( 436 isolate, JSObject::AddDataElement(array, index, element, NONE), false); 437 return true; 438 } 439 440 } // namespace 441 442 RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts) { 443 HandleScope scope(isolate); 444 Factory* factory = isolate->factory(); 445 446 DCHECK_EQ(2, args.length()); 447 448 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0); 449 CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1); 450 451 Handle<Object> value; 452 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date)); 453 454 icu::SimpleDateFormat* date_format = 455 DateFormat::UnpackDateFormat(isolate, date_format_holder); 456 CHECK_NOT_NULL(date_format); 457 458 icu::UnicodeString formatted; 459 icu::FieldPositionIterator fp_iter; 460 icu::FieldPosition fp; 461 UErrorCode status = U_ZERO_ERROR; 462 date_format->format(value->Number(), formatted, &fp_iter, status); 463 if (U_FAILURE(status)) return isolate->heap()->undefined_value(); 464 465 Handle<JSArray> result = factory->NewJSArray(0); 466 int32_t length = formatted.length(); 467 if (length == 0) return *result; 468 469 int index = 0; 470 int32_t previous_end_pos = 0; 471 while (fp_iter.next(fp)) { 472 int32_t begin_pos = fp.getBeginIndex(); 473 int32_t end_pos = fp.getEndIndex(); 474 475 if (previous_end_pos < begin_pos) { 476 if (!AddElement(result, index, -1, formatted, previous_end_pos, begin_pos, 477 isolate)) { 478 return isolate->heap()->undefined_value(); 479 } 480 ++index; 481 } 482 if (!AddElement(result, index, fp.getField(), formatted, begin_pos, end_pos, 483 isolate)) { 484 return isolate->heap()->undefined_value(); 485 } 486 previous_end_pos = end_pos; 487 ++index; 488 } 489 if (previous_end_pos < length) { 490 if (!AddElement(result, index, -1, formatted, previous_end_pos, length, 491 isolate)) { 492 return isolate->heap()->undefined_value(); 493 } 494 } 495 JSObject::ValidateElements(result); 496 return *result; 497 } 498 499 RUNTIME_FUNCTION(Runtime_CreateNumberFormat) { 500 HandleScope scope(isolate); 501 502 DCHECK_EQ(3, args.length()); 503 504 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0); 505 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1); 506 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2); 507 508 Handle<JSFunction> constructor( 509 isolate->native_context()->intl_number_format_function()); 510 511 Handle<JSObject> local_object; 512 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object, 513 JSObject::New(constructor, constructor)); 514 515 // Set number formatter as internal field of the resulting JS object. 516 icu::DecimalFormat* number_format = 517 NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved); 518 519 if (!number_format) return isolate->ThrowIllegalOperation(); 520 521 local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format)); 522 523 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object); 524 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(), 525 NumberFormat::DeleteNumberFormat, 526 WeakCallbackType::kInternalFields); 527 return *local_object; 528 } 529 530 531 RUNTIME_FUNCTION(Runtime_InternalNumberFormat) { 532 HandleScope scope(isolate); 533 534 DCHECK_EQ(2, args.length()); 535 536 CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0); 537 CONVERT_ARG_HANDLE_CHECKED(Object, number, 1); 538 539 Handle<Object> value; 540 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(number)); 541 542 icu::DecimalFormat* number_format = 543 NumberFormat::UnpackNumberFormat(isolate, number_format_holder); 544 CHECK_NOT_NULL(number_format); 545 546 icu::UnicodeString result; 547 number_format->format(value->Number(), result); 548 549 RETURN_RESULT_OR_FAILURE( 550 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( 551 reinterpret_cast<const uint16_t*>(result.getBuffer()), 552 result.length()))); 553 } 554 555 556 RUNTIME_FUNCTION(Runtime_CreateCollator) { 557 HandleScope scope(isolate); 558 559 DCHECK_EQ(3, args.length()); 560 561 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0); 562 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1); 563 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2); 564 565 Handle<JSFunction> constructor( 566 isolate->native_context()->intl_collator_function()); 567 568 Handle<JSObject> local_object; 569 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object, 570 JSObject::New(constructor, constructor)); 571 572 // Set collator as internal field of the resulting JS object. 573 icu::Collator* collator = 574 Collator::InitializeCollator(isolate, locale, options, resolved); 575 576 if (!collator) return isolate->ThrowIllegalOperation(); 577 578 local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator)); 579 580 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object); 581 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(), 582 Collator::DeleteCollator, 583 WeakCallbackType::kInternalFields); 584 return *local_object; 585 } 586 587 588 RUNTIME_FUNCTION(Runtime_InternalCompare) { 589 HandleScope scope(isolate); 590 591 DCHECK_EQ(3, args.length()); 592 593 CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0); 594 CONVERT_ARG_HANDLE_CHECKED(String, string1, 1); 595 CONVERT_ARG_HANDLE_CHECKED(String, string2, 2); 596 597 icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder); 598 CHECK_NOT_NULL(collator); 599 600 string1 = String::Flatten(string1); 601 string2 = String::Flatten(string2); 602 603 UCollationResult result; 604 UErrorCode status = U_ZERO_ERROR; 605 { 606 DisallowHeapAllocation no_gc; 607 int32_t length1 = string1->length(); 608 int32_t length2 = string2->length(); 609 String::FlatContent flat1 = string1->GetFlatContent(); 610 String::FlatContent flat2 = string2->GetFlatContent(); 611 std::unique_ptr<uc16[]> sap1; 612 std::unique_ptr<uc16[]> sap2; 613 icu::UnicodeString string_val1( 614 FALSE, GetUCharBufferFromFlat(flat1, &sap1, length1), length1); 615 icu::UnicodeString string_val2( 616 FALSE, GetUCharBufferFromFlat(flat2, &sap2, length2), length2); 617 result = collator->compare(string_val1, string_val2, status); 618 } 619 if (U_FAILURE(status)) return isolate->ThrowIllegalOperation(); 620 621 return *isolate->factory()->NewNumberFromInt(result); 622 } 623 624 625 RUNTIME_FUNCTION(Runtime_StringNormalize) { 626 HandleScope scope(isolate); 627 static const struct { 628 const char* name; 629 UNormalization2Mode mode; 630 } normalizationForms[] = { 631 {"nfc", UNORM2_COMPOSE}, 632 {"nfc", UNORM2_DECOMPOSE}, 633 {"nfkc", UNORM2_COMPOSE}, 634 {"nfkc", UNORM2_DECOMPOSE}, 635 }; 636 637 DCHECK_EQ(2, args.length()); 638 639 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); 640 CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]); 641 CHECK(form_id >= 0 && 642 static_cast<size_t>(form_id) < arraysize(normalizationForms)); 643 644 int length = s->length(); 645 s = String::Flatten(s); 646 icu::UnicodeString result; 647 std::unique_ptr<uc16[]> sap; 648 UErrorCode status = U_ZERO_ERROR; 649 { 650 DisallowHeapAllocation no_gc; 651 String::FlatContent flat = s->GetFlatContent(); 652 const UChar* src = GetUCharBufferFromFlat(flat, &sap, length); 653 icu::UnicodeString input(false, src, length); 654 // Getting a singleton. Should not free it. 655 const icu::Normalizer2* normalizer = 656 icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name, 657 normalizationForms[form_id].mode, status); 658 DCHECK(U_SUCCESS(status)); 659 CHECK(normalizer != nullptr); 660 int32_t normalized_prefix_length = 661 normalizer->spanQuickCheckYes(input, status); 662 // Quick return if the input is already normalized. 663 if (length == normalized_prefix_length) return *s; 664 icu::UnicodeString unnormalized = 665 input.tempSubString(normalized_prefix_length); 666 // Read-only alias of the normalized prefix. 667 result.setTo(false, input.getBuffer(), normalized_prefix_length); 668 // copy-on-write; normalize the suffix and append to |result|. 669 normalizer->normalizeSecondAndAppend(result, unnormalized, status); 670 } 671 672 if (U_FAILURE(status)) { 673 return isolate->heap()->undefined_value(); 674 } 675 676 RETURN_RESULT_OR_FAILURE( 677 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>( 678 reinterpret_cast<const uint16_t*>(result.getBuffer()), 679 result.length()))); 680 } 681 682 683 RUNTIME_FUNCTION(Runtime_CreateBreakIterator) { 684 HandleScope scope(isolate); 685 686 DCHECK_EQ(3, args.length()); 687 688 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0); 689 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1); 690 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2); 691 692 Handle<JSFunction> constructor( 693 isolate->native_context()->intl_v8_break_iterator_function()); 694 695 Handle<JSObject> local_object; 696 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object, 697 JSObject::New(constructor, constructor)); 698 699 // Set break iterator as internal field of the resulting JS object. 700 icu::BreakIterator* break_iterator = V8BreakIterator::InitializeBreakIterator( 701 isolate, locale, options, resolved); 702 703 if (!break_iterator) return isolate->ThrowIllegalOperation(); 704 705 local_object->SetInternalField(0, reinterpret_cast<Smi*>(break_iterator)); 706 // Make sure that the pointer to adopted text is NULL. 707 local_object->SetInternalField(1, static_cast<Smi*>(nullptr)); 708 709 // Make object handle weak so we can delete the break iterator once GC kicks 710 // in. 711 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object); 712 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(), 713 V8BreakIterator::DeleteBreakIterator, 714 WeakCallbackType::kInternalFields); 715 return *local_object; 716 } 717 718 719 RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) { 720 HandleScope scope(isolate); 721 722 DCHECK_EQ(2, args.length()); 723 724 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); 725 CONVERT_ARG_HANDLE_CHECKED(String, text, 1); 726 727 icu::BreakIterator* break_iterator = 728 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); 729 CHECK_NOT_NULL(break_iterator); 730 731 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>( 732 break_iterator_holder->GetInternalField(1)); 733 delete u_text; 734 735 int length = text->length(); 736 text = String::Flatten(text); 737 DisallowHeapAllocation no_gc; 738 String::FlatContent flat = text->GetFlatContent(); 739 std::unique_ptr<uc16[]> sap; 740 const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length); 741 u_text = new icu::UnicodeString(text_value, length); 742 break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text)); 743 744 break_iterator->setText(*u_text); 745 746 return isolate->heap()->undefined_value(); 747 } 748 749 750 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) { 751 HandleScope scope(isolate); 752 753 DCHECK_EQ(1, args.length()); 754 755 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); 756 757 icu::BreakIterator* break_iterator = 758 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); 759 CHECK_NOT_NULL(break_iterator); 760 761 return *isolate->factory()->NewNumberFromInt(break_iterator->first()); 762 } 763 764 765 RUNTIME_FUNCTION(Runtime_BreakIteratorNext) { 766 HandleScope scope(isolate); 767 768 DCHECK_EQ(1, args.length()); 769 770 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); 771 772 icu::BreakIterator* break_iterator = 773 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); 774 CHECK_NOT_NULL(break_iterator); 775 776 return *isolate->factory()->NewNumberFromInt(break_iterator->next()); 777 } 778 779 780 RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) { 781 HandleScope scope(isolate); 782 783 DCHECK_EQ(1, args.length()); 784 785 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); 786 787 icu::BreakIterator* break_iterator = 788 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); 789 CHECK_NOT_NULL(break_iterator); 790 791 return *isolate->factory()->NewNumberFromInt(break_iterator->current()); 792 } 793 794 795 RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) { 796 HandleScope scope(isolate); 797 798 DCHECK_EQ(1, args.length()); 799 800 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0); 801 802 icu::BreakIterator* break_iterator = 803 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder); 804 CHECK_NOT_NULL(break_iterator); 805 806 // TODO(cira): Remove cast once ICU fixes base BreakIterator class. 807 icu::RuleBasedBreakIterator* rule_based_iterator = 808 static_cast<icu::RuleBasedBreakIterator*>(break_iterator); 809 int32_t status = rule_based_iterator->getRuleStatus(); 810 // Keep return values in sync with JavaScript BreakType enum. 811 if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) { 812 return *isolate->factory()->NewStringFromStaticChars("none"); 813 } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) { 814 return isolate->heap()->number_string(); 815 } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) { 816 return *isolate->factory()->NewStringFromStaticChars("letter"); 817 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) { 818 return *isolate->factory()->NewStringFromStaticChars("kana"); 819 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) { 820 return *isolate->factory()->NewStringFromStaticChars("ideo"); 821 } else { 822 return *isolate->factory()->NewStringFromStaticChars("unknown"); 823 } 824 } 825 826 namespace { 827 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate, 828 bool is_to_upper, const char* lang) { 829 auto case_converter = is_to_upper ? u_strToUpper : u_strToLower; 830 int32_t src_length = s->length(); 831 int32_t dest_length = src_length; 832 UErrorCode status; 833 Handle<SeqTwoByteString> result; 834 std::unique_ptr<uc16[]> sap; 835 836 if (dest_length == 0) return isolate->heap()->empty_string(); 837 838 // This is not a real loop. It'll be executed only once (no overflow) or 839 // twice (overflow). 840 for (int i = 0; i < 2; ++i) { 841 // Case conversion can increase the string length (e.g. sharp-S => SS) so 842 // that we have to handle RangeError exceptions here. 843 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 844 isolate, result, isolate->factory()->NewRawTwoByteString(dest_length)); 845 DisallowHeapAllocation no_gc; 846 DCHECK(s->IsFlat()); 847 String::FlatContent flat = s->GetFlatContent(); 848 const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length); 849 status = U_ZERO_ERROR; 850 dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()), 851 dest_length, src, src_length, lang, &status); 852 if (status != U_BUFFER_OVERFLOW_ERROR) break; 853 } 854 855 // In most cases, the output will fill the destination buffer completely 856 // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING). 857 // Only in rare cases, it'll be shorter than the destination buffer and 858 // |result| has to be truncated. 859 DCHECK(U_SUCCESS(status)); 860 if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) { 861 DCHECK(dest_length == result->length()); 862 return *result; 863 } 864 if (U_SUCCESS(status)) { 865 DCHECK(dest_length < result->length()); 866 return *Handle<SeqTwoByteString>::cast( 867 SeqString::Truncate(result, dest_length)); 868 } 869 return *s; 870 } 871 872 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; } 873 874 const uint8_t kToLower[256] = { 875 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 876 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 877 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 878 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 879 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 880 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 881 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 882 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 883 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 884 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 885 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83, 886 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 887 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 888 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 889 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 890 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, 891 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 892 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7, 893 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 894 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 895 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 896 0xFC, 0xFD, 0xFE, 0xFF, 897 }; 898 899 inline uint16_t ToLatin1Lower(uint16_t ch) { 900 return static_cast<uint16_t>(kToLower[ch]); 901 } 902 903 inline uint16_t ToASCIIUpper(uint16_t ch) { 904 return ch & ~((ch >= 'a' && ch <= 'z') << 5); 905 } 906 907 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF. 908 inline uint16_t ToLatin1Upper(uint16_t ch) { 909 DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF); 910 return ch & 911 ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) 912 << 5); 913 } 914 915 template <typename Char> 916 bool ToUpperFastASCII(const Vector<const Char>& src, 917 Handle<SeqOneByteString> result) { 918 // Do a faster loop for the case where all the characters are ASCII. 919 uint16_t ored = 0; 920 int32_t index = 0; 921 for (auto it = src.begin(); it != src.end(); ++it) { 922 uint16_t ch = static_cast<uint16_t>(*it); 923 ored |= ch; 924 result->SeqOneByteStringSet(index++, ToASCIIUpper(ch)); 925 } 926 return !(ored & ~0x7F); 927 } 928 929 const uint16_t sharp_s = 0xDF; 930 931 template <typename Char> 932 bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest, 933 int* sharp_s_count) { 934 // Still pretty-fast path for the input with non-ASCII Latin-1 characters. 935 936 // There are two special cases. 937 // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF. 938 // 2. Lower case sharp-S converts to "SS" (two characters) 939 *sharp_s_count = 0; 940 for (auto it = src.begin(); it != src.end(); ++it) { 941 uint16_t ch = static_cast<uint16_t>(*it); 942 if (V8_UNLIKELY(ch == sharp_s)) { 943 ++(*sharp_s_count); 944 continue; 945 } 946 if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) { 947 // Since this upper-cased character does not fit in an 8-bit string, we 948 // need to take the 16-bit path. 949 return false; 950 } 951 *dest++ = ToLatin1Upper(ch); 952 } 953 954 return true; 955 } 956 957 template <typename Char> 958 void ToUpperWithSharpS(const Vector<const Char>& src, 959 Handle<SeqOneByteString> result) { 960 int32_t dest_index = 0; 961 for (auto it = src.begin(); it != src.end(); ++it) { 962 uint16_t ch = static_cast<uint16_t>(*it); 963 if (ch == sharp_s) { 964 result->SeqOneByteStringSet(dest_index++, 'S'); 965 result->SeqOneByteStringSet(dest_index++, 'S'); 966 } else { 967 result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch)); 968 } 969 } 970 } 971 972 inline int FindFirstUpperOrNonAscii(Handle<String> s, int length) { 973 for (int index = 0; index < length; ++index) { 974 uint16_t ch = s->Get(index); 975 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) { 976 return index; 977 } 978 } 979 return length; 980 } 981 982 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) { 983 if (!s->HasOnlyOneByteChars()) { 984 // Use a slower implementation for strings with characters beyond U+00FF. 985 return LocaleConvertCase(s, isolate, false, ""); 986 } 987 988 int length = s->length(); 989 990 // We depend here on the invariant that the length of a Latin1 991 // string is invariant under ToLowerCase, and the result always 992 // fits in the Latin1 range in the *root locale*. It does not hold 993 // for ToUpperCase even in the root locale. 994 995 // Scan the string for uppercase and non-ASCII characters for strings 996 // shorter than a machine-word without any memory allocation overhead. 997 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert() 998 // to two parts, one for scanning the prefix with no change and the other for 999 // handling ASCII-only characters. 1000 int index_to_first_unprocessed = length; 1001 const bool is_short = length < static_cast<int>(sizeof(uintptr_t)); 1002 if (is_short) { 1003 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); 1004 // Nothing to do if the string is all ASCII with no uppercase. 1005 if (index_to_first_unprocessed == length) return *s; 1006 } 1007 1008 Handle<SeqOneByteString> result = 1009 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); 1010 1011 DisallowHeapAllocation no_gc; 1012 DCHECK(s->IsFlat()); 1013 String::FlatContent flat = s->GetFlatContent(); 1014 uint8_t* dest = result->GetChars(); 1015 if (flat.IsOneByte()) { 1016 const uint8_t* src = flat.ToOneByteVector().start(); 1017 bool has_changed_character = false; 1018 index_to_first_unprocessed = FastAsciiConvert<true>( 1019 reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(src), 1020 length, &has_changed_character); 1021 // If not ASCII, we keep the result up to index_to_first_unprocessed and 1022 // process the rest. 1023 if (index_to_first_unprocessed == length) 1024 return has_changed_character ? *result : *s; 1025 1026 for (int index = index_to_first_unprocessed; index < length; ++index) { 1027 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index])); 1028 } 1029 } else { 1030 if (index_to_first_unprocessed == length) { 1031 DCHECK(!is_short); 1032 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length); 1033 } 1034 // Nothing to do if the string is all ASCII with no uppercase. 1035 if (index_to_first_unprocessed == length) return *s; 1036 const uint16_t* src = flat.ToUC16Vector().start(); 1037 CopyChars(dest, src, index_to_first_unprocessed); 1038 for (int index = index_to_first_unprocessed; index < length; ++index) { 1039 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index])); 1040 } 1041 } 1042 1043 return *result; 1044 } 1045 1046 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) { 1047 int32_t length = s->length(); 1048 if (s->HasOnlyOneByteChars() && length > 0) { 1049 Handle<SeqOneByteString> result = 1050 isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); 1051 1052 DCHECK(s->IsFlat()); 1053 int sharp_s_count; 1054 bool is_result_single_byte; 1055 { 1056 DisallowHeapAllocation no_gc; 1057 String::FlatContent flat = s->GetFlatContent(); 1058 uint8_t* dest = result->GetChars(); 1059 if (flat.IsOneByte()) { 1060 Vector<const uint8_t> src = flat.ToOneByteVector(); 1061 bool has_changed_character = false; 1062 int index_to_first_unprocessed = 1063 FastAsciiConvert<false>(reinterpret_cast<char*>(result->GetChars()), 1064 reinterpret_cast<const char*>(src.start()), 1065 length, &has_changed_character); 1066 if (index_to_first_unprocessed == length) 1067 return has_changed_character ? *result : *s; 1068 // If not ASCII, we keep the result up to index_to_first_unprocessed and 1069 // process the rest. 1070 is_result_single_byte = 1071 ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length), 1072 dest + index_to_first_unprocessed, &sharp_s_count); 1073 } else { 1074 DCHECK(flat.IsTwoByte()); 1075 Vector<const uint16_t> src = flat.ToUC16Vector(); 1076 if (ToUpperFastASCII(src, result)) return *result; 1077 is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count); 1078 } 1079 } 1080 1081 // Go to the full Unicode path if there are characters whose uppercase 1082 // is beyond the Latin-1 range (cannot be represented in OneByteString). 1083 if (V8_UNLIKELY(!is_result_single_byte)) { 1084 return LocaleConvertCase(s, isolate, true, ""); 1085 } 1086 1087 if (sharp_s_count == 0) return *result; 1088 1089 // We have sharp_s_count sharp-s characters, but the result is still 1090 // in the Latin-1 range. 1091 ASSIGN_RETURN_FAILURE_ON_EXCEPTION( 1092 isolate, result, 1093 isolate->factory()->NewRawOneByteString(length + sharp_s_count)); 1094 DisallowHeapAllocation no_gc; 1095 String::FlatContent flat = s->GetFlatContent(); 1096 if (flat.IsOneByte()) { 1097 ToUpperWithSharpS(flat.ToOneByteVector(), result); 1098 } else { 1099 ToUpperWithSharpS(flat.ToUC16Vector(), result); 1100 } 1101 1102 return *result; 1103 } 1104 1105 return LocaleConvertCase(s, isolate, true, ""); 1106 } 1107 1108 MUST_USE_RESULT Object* ConvertCase(Handle<String> s, bool is_upper, 1109 Isolate* isolate) { 1110 return is_upper ? ConvertToUpper(s, isolate) : ConvertToLower(s, isolate); 1111 } 1112 1113 } // namespace 1114 1115 RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) { 1116 HandleScope scope(isolate); 1117 DCHECK_EQ(args.length(), 1); 1118 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); 1119 s = String::Flatten(s); 1120 return ConvertToLower(s, isolate); 1121 } 1122 1123 RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) { 1124 HandleScope scope(isolate); 1125 DCHECK_EQ(args.length(), 1); 1126 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); 1127 s = String::Flatten(s); 1128 return ConvertToUpper(s, isolate); 1129 } 1130 1131 RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) { 1132 HandleScope scope(isolate); 1133 DCHECK_EQ(args.length(), 3); 1134 CONVERT_ARG_HANDLE_CHECKED(String, s, 0); 1135 CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1); 1136 CONVERT_ARG_HANDLE_CHECKED(String, lang_arg, 2); 1137 1138 // Primary language tag can be up to 8 characters long in theory. 1139 // https://tools.ietf.org/html/bcp47#section-2.2.1 1140 DCHECK(lang_arg->length() <= 8); 1141 lang_arg = String::Flatten(lang_arg); 1142 s = String::Flatten(s); 1143 1144 // All the languages requiring special-handling have two-letter codes. 1145 if (V8_UNLIKELY(lang_arg->length() > 2)) 1146 return ConvertCase(s, is_upper, isolate); 1147 1148 char c1, c2; 1149 { 1150 DisallowHeapAllocation no_gc; 1151 String::FlatContent lang = lang_arg->GetFlatContent(); 1152 c1 = lang.Get(0); 1153 c2 = lang.Get(1); 1154 } 1155 // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath 1156 // in the root locale needs to be adjusted for az, lt and tr because even case 1157 // mapping of ASCII range characters are different in those locales. 1158 // Greek (el) does not require any adjustment. 1159 if (V8_UNLIKELY(c1 == 't' && c2 == 'r')) 1160 return LocaleConvertCase(s, isolate, is_upper, "tr"); 1161 if (V8_UNLIKELY(c1 == 'e' && c2 == 'l')) 1162 return LocaleConvertCase(s, isolate, is_upper, "el"); 1163 if (V8_UNLIKELY(c1 == 'l' && c2 == 't')) 1164 return LocaleConvertCase(s, isolate, is_upper, "lt"); 1165 if (V8_UNLIKELY(c1 == 'a' && c2 == 'z')) 1166 return LocaleConvertCase(s, isolate, is_upper, "az"); 1167 1168 return ConvertCase(s, is_upper, isolate); 1169 } 1170 1171 RUNTIME_FUNCTION(Runtime_DateCacheVersion) { 1172 HandleScope scope(isolate); 1173 DCHECK_EQ(0, args.length()); 1174 if (isolate->serializer_enabled()) return isolate->heap()->undefined_value(); 1175 if (!isolate->eternal_handles()->Exists(EternalHandles::DATE_CACHE_VERSION)) { 1176 Handle<FixedArray> date_cache_version = 1177 isolate->factory()->NewFixedArray(1, TENURED); 1178 date_cache_version->set(0, Smi::kZero); 1179 isolate->eternal_handles()->CreateSingleton( 1180 isolate, *date_cache_version, EternalHandles::DATE_CACHE_VERSION); 1181 } 1182 Handle<FixedArray> date_cache_version = 1183 Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton( 1184 EternalHandles::DATE_CACHE_VERSION)); 1185 return date_cache_version->get(0); 1186 } 1187 1188 } // namespace internal 1189 } // namespace v8 1190 1191 #endif // V8_I18N_SUPPORT 1192