Home | History | Annotate | Download | only in i18n
      1 // Copyright 2013 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 #include "break-iterator.h"
     29 
     30 #include <string.h>
     31 
     32 #include "i18n-utils.h"
     33 #include "unicode/brkiter.h"
     34 #include "unicode/locid.h"
     35 #include "unicode/rbbi.h"
     36 
     37 namespace v8_i18n {
     38 
     39 static v8::Handle<v8::Value> ThrowUnexpectedObjectError();
     40 static icu::UnicodeString* ResetAdoptedText(v8::Handle<v8::Object>,
     41                                             v8::Handle<v8::Value>);
     42 static icu::BreakIterator* InitializeBreakIterator(v8::Handle<v8::String>,
     43                                                    v8::Handle<v8::Object>,
     44                                                    v8::Handle<v8::Object>);
     45 static icu::BreakIterator* CreateICUBreakIterator(const icu::Locale&,
     46                                                   v8::Handle<v8::Object>);
     47 static void SetResolvedSettings(const icu::Locale&,
     48                                 icu::BreakIterator*,
     49                                 v8::Handle<v8::Object>);
     50 
     51 icu::BreakIterator* BreakIterator::UnpackBreakIterator(
     52     v8::Handle<v8::Object> obj) {
     53   v8::HandleScope handle_scope;
     54 
     55   // v8::ObjectTemplate doesn't have HasInstance method so we can't check
     56   // if obj is an instance of BreakIterator class. We'll check for a property
     57   // that has to be in the object. The same applies to other services, like
     58   // Collator and DateTimeFormat.
     59   if (obj->HasOwnProperty(v8::String::New("breakIterator"))) {
     60     return static_cast<icu::BreakIterator*>(
     61         obj->GetAlignedPointerFromInternalField(0));
     62   }
     63 
     64   return NULL;
     65 }
     66 
     67 void BreakIterator::DeleteBreakIterator(v8::Isolate* isolate,
     68                                         v8::Persistent<v8::Object>* object,
     69                                         void* param) {
     70   // First delete the hidden C++ object.
     71   // Unpacking should never return NULL here. That would only happen if
     72   // this method is used as the weak callback for persistent handles not
     73   // pointing to a break iterator.
     74   v8::HandleScope handle_scope(isolate);
     75   v8::Local<v8::Object> handle = v8::Local<v8::Object>::New(isolate, *object);
     76   delete UnpackBreakIterator(handle);
     77 
     78   delete static_cast<icu::UnicodeString*>(
     79       handle->GetAlignedPointerFromInternalField(1));
     80 
     81   // Then dispose of the persistent handle to JS object.
     82   object->Dispose(isolate);
     83 }
     84 
     85 
     86 // Throws a JavaScript exception.
     87 static v8::Handle<v8::Value> ThrowUnexpectedObjectError() {
     88   // Returns undefined, and schedules an exception to be thrown.
     89   return v8::ThrowException(v8::Exception::Error(
     90       v8::String::New("BreakIterator method called on an object "
     91                       "that is not a BreakIterator.")));
     92 }
     93 
     94 
     95 // Deletes the old value and sets the adopted text in corresponding
     96 // JavaScript object.
     97 icu::UnicodeString* ResetAdoptedText(
     98     v8::Handle<v8::Object> obj, v8::Handle<v8::Value> value) {
     99   // Get the previous value from the internal field.
    100   icu::UnicodeString* text = static_cast<icu::UnicodeString*>(
    101       obj->GetAlignedPointerFromInternalField(1));
    102   delete text;
    103 
    104   // Assign new value to the internal pointer.
    105   v8::String::Value text_value(value);
    106   text = new icu::UnicodeString(
    107       reinterpret_cast<const UChar*>(*text_value), text_value.length());
    108   obj->SetAlignedPointerInInternalField(1, text);
    109 
    110   // Return new unicode string pointer.
    111   return text;
    112 }
    113 
    114 void BreakIterator::JSInternalBreakIteratorAdoptText(
    115     const v8::FunctionCallbackInfo<v8::Value>& args) {
    116   if (args.Length() != 2 || !args[0]->IsObject() || !args[1]->IsString()) {
    117     v8::ThrowException(v8::Exception::Error(
    118         v8::String::New(
    119             "Internal error. Iterator and text have to be specified.")));
    120     return;
    121   }
    122 
    123   icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject());
    124   if (!break_iterator) {
    125     ThrowUnexpectedObjectError();
    126     return;
    127   }
    128 
    129   break_iterator->setText(*ResetAdoptedText(args[0]->ToObject(), args[1]));
    130 }
    131 
    132 void BreakIterator::JSInternalBreakIteratorFirst(
    133     const v8::FunctionCallbackInfo<v8::Value>& args) {
    134   icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject());
    135   if (!break_iterator) {
    136     ThrowUnexpectedObjectError();
    137     return;
    138   }
    139 
    140   args.GetReturnValue().Set(static_cast<int32_t>(break_iterator->first()));
    141 }
    142 
    143 void BreakIterator::JSInternalBreakIteratorNext(
    144     const v8::FunctionCallbackInfo<v8::Value>& args) {
    145   icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject());
    146   if (!break_iterator) {
    147     ThrowUnexpectedObjectError();
    148     return;
    149   }
    150 
    151   args.GetReturnValue().Set(static_cast<int32_t>(break_iterator->next()));
    152 }
    153 
    154 void BreakIterator::JSInternalBreakIteratorCurrent(
    155     const v8::FunctionCallbackInfo<v8::Value>& args) {
    156   icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject());
    157   if (!break_iterator) {
    158     ThrowUnexpectedObjectError();
    159     return;
    160   }
    161 
    162   args.GetReturnValue().Set(static_cast<int32_t>(break_iterator->current()));
    163 }
    164 
    165 void BreakIterator::JSInternalBreakIteratorBreakType(
    166     const v8::FunctionCallbackInfo<v8::Value>& args) {
    167   icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject());
    168   if (!break_iterator) {
    169     ThrowUnexpectedObjectError();
    170     return;
    171   }
    172 
    173   // TODO(cira): Remove cast once ICU fixes base BreakIterator class.
    174   icu::RuleBasedBreakIterator* rule_based_iterator =
    175       static_cast<icu::RuleBasedBreakIterator*>(break_iterator);
    176   int32_t status = rule_based_iterator->getRuleStatus();
    177   // Keep return values in sync with JavaScript BreakType enum.
    178   v8::Handle<v8::String> result;
    179   if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
    180     result = v8::String::New("none");
    181   } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
    182     result = v8::String::New("number");
    183   } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
    184     result = v8::String::New("letter");
    185   } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
    186     result = v8::String::New("kana");
    187   } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
    188     result = v8::String::New("ideo");
    189   } else {
    190     result = v8::String::New("unknown");
    191   }
    192   args.GetReturnValue().Set(result);
    193 }
    194 
    195 void BreakIterator::JSCreateBreakIterator(
    196     const v8::FunctionCallbackInfo<v8::Value>& args) {
    197   if (args.Length() != 3 || !args[0]->IsString() || !args[1]->IsObject() ||
    198       !args[2]->IsObject()) {
    199     v8::ThrowException(v8::Exception::Error(
    200         v8::String::New("Internal error, wrong parameters.")));
    201     return;
    202   }
    203 
    204   v8::Isolate* isolate = args.GetIsolate();
    205   v8::Local<v8::ObjectTemplate> break_iterator_template =
    206       Utils::GetTemplate2(isolate);
    207 
    208   // Create an empty object wrapper.
    209   v8::Local<v8::Object> local_object = break_iterator_template->NewInstance();
    210   // But the handle shouldn't be empty.
    211   // That can happen if there was a stack overflow when creating the object.
    212   if (local_object.IsEmpty()) {
    213     args.GetReturnValue().Set(local_object);
    214     return;
    215   }
    216 
    217   // Set break iterator as internal field of the resulting JS object.
    218   icu::BreakIterator* break_iterator = InitializeBreakIterator(
    219       args[0]->ToString(), args[1]->ToObject(), args[2]->ToObject());
    220 
    221   if (!break_iterator) {
    222     v8::ThrowException(v8::Exception::Error(v8::String::New(
    223         "Internal error. Couldn't create ICU break iterator.")));
    224     return;
    225   } else {
    226     local_object->SetAlignedPointerInInternalField(0, break_iterator);
    227     // Make sure that the pointer to adopted text is NULL.
    228     local_object->SetAlignedPointerInInternalField(1, NULL);
    229 
    230     v8::TryCatch try_catch;
    231     local_object->Set(v8::String::New("breakIterator"),
    232                       v8::String::New("valid"));
    233     if (try_catch.HasCaught()) {
    234       v8::ThrowException(v8::Exception::Error(
    235           v8::String::New("Internal error, couldn't set property.")));
    236       return;
    237     }
    238   }
    239 
    240   v8::Persistent<v8::Object> wrapper(isolate, local_object);
    241   // Make object handle weak so we can delete iterator once GC kicks in.
    242   wrapper.MakeWeak<void>(NULL, &DeleteBreakIterator);
    243   args.GetReturnValue().Set(wrapper);
    244   wrapper.ClearAndLeak();
    245 }
    246 
    247 static icu::BreakIterator* InitializeBreakIterator(
    248     v8::Handle<v8::String> locale,
    249     v8::Handle<v8::Object> options,
    250     v8::Handle<v8::Object> resolved) {
    251   // Convert BCP47 into ICU locale format.
    252   UErrorCode status = U_ZERO_ERROR;
    253   icu::Locale icu_locale;
    254   char icu_result[ULOC_FULLNAME_CAPACITY];
    255   int icu_length = 0;
    256   v8::String::AsciiValue bcp47_locale(locale);
    257   if (bcp47_locale.length() != 0) {
    258     uloc_forLanguageTag(*bcp47_locale, icu_result, ULOC_FULLNAME_CAPACITY,
    259                         &icu_length, &status);
    260     if (U_FAILURE(status) || icu_length == 0) {
    261       return NULL;
    262     }
    263     icu_locale = icu::Locale(icu_result);
    264   }
    265 
    266   icu::BreakIterator* break_iterator =
    267     CreateICUBreakIterator(icu_locale, options);
    268   if (!break_iterator) {
    269     // Remove extensions and try again.
    270     icu::Locale no_extension_locale(icu_locale.getBaseName());
    271     break_iterator = CreateICUBreakIterator(no_extension_locale, options);
    272 
    273     // Set resolved settings (locale).
    274     SetResolvedSettings(no_extension_locale, break_iterator, resolved);
    275   } else {
    276     SetResolvedSettings(icu_locale, break_iterator, resolved);
    277   }
    278 
    279   return break_iterator;
    280 }
    281 
    282 static icu::BreakIterator* CreateICUBreakIterator(
    283     const icu::Locale& icu_locale, v8::Handle<v8::Object> options) {
    284   UErrorCode status = U_ZERO_ERROR;
    285   icu::BreakIterator* break_iterator = NULL;
    286   icu::UnicodeString type;
    287   if (!Utils::ExtractStringSetting(options, "type", &type)) {
    288     // Type had to be in the options. This would be an internal error.
    289     return NULL;
    290   }
    291 
    292   if (type == UNICODE_STRING_SIMPLE("character")) {
    293     break_iterator =
    294       icu::BreakIterator::createCharacterInstance(icu_locale, status);
    295   } else if (type == UNICODE_STRING_SIMPLE("sentence")) {
    296     break_iterator =
    297       icu::BreakIterator::createSentenceInstance(icu_locale, status);
    298   } else if (type == UNICODE_STRING_SIMPLE("line")) {
    299     break_iterator =
    300       icu::BreakIterator::createLineInstance(icu_locale, status);
    301   } else {
    302     // Defualt is word iterator.
    303     break_iterator =
    304       icu::BreakIterator::createWordInstance(icu_locale, status);
    305   }
    306 
    307   if (U_FAILURE(status)) {
    308     delete break_iterator;
    309     return NULL;
    310   }
    311 
    312   return break_iterator;
    313 }
    314 
    315 static void SetResolvedSettings(const icu::Locale& icu_locale,
    316                                 icu::BreakIterator* date_format,
    317                                 v8::Handle<v8::Object> resolved) {
    318   UErrorCode status = U_ZERO_ERROR;
    319 
    320   // Set the locale
    321   char result[ULOC_FULLNAME_CAPACITY];
    322   status = U_ZERO_ERROR;
    323   uloc_toLanguageTag(
    324       icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY, FALSE, &status);
    325   if (U_SUCCESS(status)) {
    326     resolved->Set(v8::String::New("locale"), v8::String::New(result));
    327   } else {
    328     // This would never happen, since we got the locale from ICU.
    329     resolved->Set(v8::String::New("locale"), v8::String::New("und"));
    330   }
    331 }
    332 
    333 }  // namespace v8_i18n
    334