1 // Copyright 2013 the V8 project authors. All rights reserved. 2 // Redistribution and use in source and binary forms, with or without 3 // modification, are permitted provided that the following conditions are 4 // met: 5 // 6 // * Redistributions of source code must retain the above copyright 7 // notice, this list of conditions and the following disclaimer. 8 // * Redistributions in binary form must reproduce the above 9 // copyright notice, this list of conditions and the following 10 // disclaimer in the documentation and/or other materials provided 11 // with the distribution. 12 // * Neither the name of Google Inc. nor the names of its 13 // contributors may be used to endorse or promote products derived 14 // from this software without specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28 #include "break-iterator.h" 29 30 #include <string.h> 31 32 #include "i18n-utils.h" 33 #include "unicode/brkiter.h" 34 #include "unicode/locid.h" 35 #include "unicode/rbbi.h" 36 37 namespace v8_i18n { 38 39 static v8::Handle<v8::Value> ThrowUnexpectedObjectError(); 40 static icu::UnicodeString* ResetAdoptedText(v8::Handle<v8::Object>, 41 v8::Handle<v8::Value>); 42 static icu::BreakIterator* InitializeBreakIterator(v8::Handle<v8::String>, 43 v8::Handle<v8::Object>, 44 v8::Handle<v8::Object>); 45 static icu::BreakIterator* CreateICUBreakIterator(const icu::Locale&, 46 v8::Handle<v8::Object>); 47 static void SetResolvedSettings(const icu::Locale&, 48 icu::BreakIterator*, 49 v8::Handle<v8::Object>); 50 51 icu::BreakIterator* BreakIterator::UnpackBreakIterator( 52 v8::Handle<v8::Object> obj) { 53 v8::HandleScope handle_scope; 54 55 // v8::ObjectTemplate doesn't have HasInstance method so we can't check 56 // if obj is an instance of BreakIterator class. We'll check for a property 57 // that has to be in the object. The same applies to other services, like 58 // Collator and DateTimeFormat. 59 if (obj->HasOwnProperty(v8::String::New("breakIterator"))) { 60 return static_cast<icu::BreakIterator*>( 61 obj->GetAlignedPointerFromInternalField(0)); 62 } 63 64 return NULL; 65 } 66 67 void BreakIterator::DeleteBreakIterator(v8::Isolate* isolate, 68 v8::Persistent<v8::Object>* object, 69 void* param) { 70 // First delete the hidden C++ object. 71 // Unpacking should never return NULL here. That would only happen if 72 // this method is used as the weak callback for persistent handles not 73 // pointing to a break iterator. 74 v8::HandleScope handle_scope(isolate); 75 v8::Local<v8::Object> handle = v8::Local<v8::Object>::New(isolate, *object); 76 delete UnpackBreakIterator(handle); 77 78 delete static_cast<icu::UnicodeString*>( 79 handle->GetAlignedPointerFromInternalField(1)); 80 81 // Then dispose of the persistent handle to JS object. 82 object->Dispose(isolate); 83 } 84 85 86 // Throws a JavaScript exception. 87 static v8::Handle<v8::Value> ThrowUnexpectedObjectError() { 88 // Returns undefined, and schedules an exception to be thrown. 89 return v8::ThrowException(v8::Exception::Error( 90 v8::String::New("BreakIterator method called on an object " 91 "that is not a BreakIterator."))); 92 } 93 94 95 // Deletes the old value and sets the adopted text in corresponding 96 // JavaScript object. 97 icu::UnicodeString* ResetAdoptedText( 98 v8::Handle<v8::Object> obj, v8::Handle<v8::Value> value) { 99 // Get the previous value from the internal field. 100 icu::UnicodeString* text = static_cast<icu::UnicodeString*>( 101 obj->GetAlignedPointerFromInternalField(1)); 102 delete text; 103 104 // Assign new value to the internal pointer. 105 v8::String::Value text_value(value); 106 text = new icu::UnicodeString( 107 reinterpret_cast<const UChar*>(*text_value), text_value.length()); 108 obj->SetAlignedPointerInInternalField(1, text); 109 110 // Return new unicode string pointer. 111 return text; 112 } 113 114 void BreakIterator::JSInternalBreakIteratorAdoptText( 115 const v8::FunctionCallbackInfo<v8::Value>& args) { 116 if (args.Length() != 2 || !args[0]->IsObject() || !args[1]->IsString()) { 117 v8::ThrowException(v8::Exception::Error( 118 v8::String::New( 119 "Internal error. Iterator and text have to be specified."))); 120 return; 121 } 122 123 icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject()); 124 if (!break_iterator) { 125 ThrowUnexpectedObjectError(); 126 return; 127 } 128 129 break_iterator->setText(*ResetAdoptedText(args[0]->ToObject(), args[1])); 130 } 131 132 void BreakIterator::JSInternalBreakIteratorFirst( 133 const v8::FunctionCallbackInfo<v8::Value>& args) { 134 icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject()); 135 if (!break_iterator) { 136 ThrowUnexpectedObjectError(); 137 return; 138 } 139 140 args.GetReturnValue().Set(static_cast<int32_t>(break_iterator->first())); 141 } 142 143 void BreakIterator::JSInternalBreakIteratorNext( 144 const v8::FunctionCallbackInfo<v8::Value>& args) { 145 icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject()); 146 if (!break_iterator) { 147 ThrowUnexpectedObjectError(); 148 return; 149 } 150 151 args.GetReturnValue().Set(static_cast<int32_t>(break_iterator->next())); 152 } 153 154 void BreakIterator::JSInternalBreakIteratorCurrent( 155 const v8::FunctionCallbackInfo<v8::Value>& args) { 156 icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject()); 157 if (!break_iterator) { 158 ThrowUnexpectedObjectError(); 159 return; 160 } 161 162 args.GetReturnValue().Set(static_cast<int32_t>(break_iterator->current())); 163 } 164 165 void BreakIterator::JSInternalBreakIteratorBreakType( 166 const v8::FunctionCallbackInfo<v8::Value>& args) { 167 icu::BreakIterator* break_iterator = UnpackBreakIterator(args[0]->ToObject()); 168 if (!break_iterator) { 169 ThrowUnexpectedObjectError(); 170 return; 171 } 172 173 // TODO(cira): Remove cast once ICU fixes base BreakIterator class. 174 icu::RuleBasedBreakIterator* rule_based_iterator = 175 static_cast<icu::RuleBasedBreakIterator*>(break_iterator); 176 int32_t status = rule_based_iterator->getRuleStatus(); 177 // Keep return values in sync with JavaScript BreakType enum. 178 v8::Handle<v8::String> result; 179 if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) { 180 result = v8::String::New("none"); 181 } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) { 182 result = v8::String::New("number"); 183 } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) { 184 result = v8::String::New("letter"); 185 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) { 186 result = v8::String::New("kana"); 187 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) { 188 result = v8::String::New("ideo"); 189 } else { 190 result = v8::String::New("unknown"); 191 } 192 args.GetReturnValue().Set(result); 193 } 194 195 void BreakIterator::JSCreateBreakIterator( 196 const v8::FunctionCallbackInfo<v8::Value>& args) { 197 if (args.Length() != 3 || !args[0]->IsString() || !args[1]->IsObject() || 198 !args[2]->IsObject()) { 199 v8::ThrowException(v8::Exception::Error( 200 v8::String::New("Internal error, wrong parameters."))); 201 return; 202 } 203 204 v8::Isolate* isolate = args.GetIsolate(); 205 v8::Local<v8::ObjectTemplate> break_iterator_template = 206 Utils::GetTemplate2(isolate); 207 208 // Create an empty object wrapper. 209 v8::Local<v8::Object> local_object = break_iterator_template->NewInstance(); 210 // But the handle shouldn't be empty. 211 // That can happen if there was a stack overflow when creating the object. 212 if (local_object.IsEmpty()) { 213 args.GetReturnValue().Set(local_object); 214 return; 215 } 216 217 // Set break iterator as internal field of the resulting JS object. 218 icu::BreakIterator* break_iterator = InitializeBreakIterator( 219 args[0]->ToString(), args[1]->ToObject(), args[2]->ToObject()); 220 221 if (!break_iterator) { 222 v8::ThrowException(v8::Exception::Error(v8::String::New( 223 "Internal error. Couldn't create ICU break iterator."))); 224 return; 225 } else { 226 local_object->SetAlignedPointerInInternalField(0, break_iterator); 227 // Make sure that the pointer to adopted text is NULL. 228 local_object->SetAlignedPointerInInternalField(1, NULL); 229 230 v8::TryCatch try_catch; 231 local_object->Set(v8::String::New("breakIterator"), 232 v8::String::New("valid")); 233 if (try_catch.HasCaught()) { 234 v8::ThrowException(v8::Exception::Error( 235 v8::String::New("Internal error, couldn't set property."))); 236 return; 237 } 238 } 239 240 v8::Persistent<v8::Object> wrapper(isolate, local_object); 241 // Make object handle weak so we can delete iterator once GC kicks in. 242 wrapper.MakeWeak<void>(NULL, &DeleteBreakIterator); 243 args.GetReturnValue().Set(wrapper); 244 wrapper.ClearAndLeak(); 245 } 246 247 static icu::BreakIterator* InitializeBreakIterator( 248 v8::Handle<v8::String> locale, 249 v8::Handle<v8::Object> options, 250 v8::Handle<v8::Object> resolved) { 251 // Convert BCP47 into ICU locale format. 252 UErrorCode status = U_ZERO_ERROR; 253 icu::Locale icu_locale; 254 char icu_result[ULOC_FULLNAME_CAPACITY]; 255 int icu_length = 0; 256 v8::String::AsciiValue bcp47_locale(locale); 257 if (bcp47_locale.length() != 0) { 258 uloc_forLanguageTag(*bcp47_locale, icu_result, ULOC_FULLNAME_CAPACITY, 259 &icu_length, &status); 260 if (U_FAILURE(status) || icu_length == 0) { 261 return NULL; 262 } 263 icu_locale = icu::Locale(icu_result); 264 } 265 266 icu::BreakIterator* break_iterator = 267 CreateICUBreakIterator(icu_locale, options); 268 if (!break_iterator) { 269 // Remove extensions and try again. 270 icu::Locale no_extension_locale(icu_locale.getBaseName()); 271 break_iterator = CreateICUBreakIterator(no_extension_locale, options); 272 273 // Set resolved settings (locale). 274 SetResolvedSettings(no_extension_locale, break_iterator, resolved); 275 } else { 276 SetResolvedSettings(icu_locale, break_iterator, resolved); 277 } 278 279 return break_iterator; 280 } 281 282 static icu::BreakIterator* CreateICUBreakIterator( 283 const icu::Locale& icu_locale, v8::Handle<v8::Object> options) { 284 UErrorCode status = U_ZERO_ERROR; 285 icu::BreakIterator* break_iterator = NULL; 286 icu::UnicodeString type; 287 if (!Utils::ExtractStringSetting(options, "type", &type)) { 288 // Type had to be in the options. This would be an internal error. 289 return NULL; 290 } 291 292 if (type == UNICODE_STRING_SIMPLE("character")) { 293 break_iterator = 294 icu::BreakIterator::createCharacterInstance(icu_locale, status); 295 } else if (type == UNICODE_STRING_SIMPLE("sentence")) { 296 break_iterator = 297 icu::BreakIterator::createSentenceInstance(icu_locale, status); 298 } else if (type == UNICODE_STRING_SIMPLE("line")) { 299 break_iterator = 300 icu::BreakIterator::createLineInstance(icu_locale, status); 301 } else { 302 // Defualt is word iterator. 303 break_iterator = 304 icu::BreakIterator::createWordInstance(icu_locale, status); 305 } 306 307 if (U_FAILURE(status)) { 308 delete break_iterator; 309 return NULL; 310 } 311 312 return break_iterator; 313 } 314 315 static void SetResolvedSettings(const icu::Locale& icu_locale, 316 icu::BreakIterator* date_format, 317 v8::Handle<v8::Object> resolved) { 318 UErrorCode status = U_ZERO_ERROR; 319 320 // Set the locale 321 char result[ULOC_FULLNAME_CAPACITY]; 322 status = U_ZERO_ERROR; 323 uloc_toLanguageTag( 324 icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY, FALSE, &status); 325 if (U_SUCCESS(status)) { 326 resolved->Set(v8::String::New("locale"), v8::String::New(result)); 327 } else { 328 // This would never happen, since we got the locale from ICU. 329 resolved->Set(v8::String::New("locale"), v8::String::New("und")); 330 } 331 } 332 333 } // namespace v8_i18n 334