1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File brkiter.cpp 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 02/18/97 aliu Converted from OpenClass. Added DONE. 15 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. 16 ***************************************************************************************** 17 */ 18 19 // ***************************************************************************** 20 // This file was generated from the java source file BreakIterator.java 21 // ***************************************************************************** 22 23 #include "unicode/utypes.h" 24 25 #if !UCONFIG_NO_BREAK_ITERATION 26 27 #include "unicode/rbbi.h" 28 #include "unicode/brkiter.h" 29 #include "unicode/udata.h" 30 #include "unicode/ures.h" 31 #include "unicode/ustring.h" 32 #include "unicode/filteredbrk.h" 33 #include "ucln_cmn.h" 34 #include "cstring.h" 35 #include "umutex.h" 36 #include "servloc.h" 37 #include "locbased.h" 38 #include "uresimp.h" 39 #include "uassert.h" 40 #include "ubrkimpl.h" 41 #include "charstr.h" 42 43 // ***************************************************************************** 44 // class BreakIterator 45 // This class implements methods for finding the location of boundaries in text. 46 // Instances of BreakIterator maintain a current position and scan over text 47 // returning the index of characters where boundaries occur. 48 // ***************************************************************************** 49 50 U_NAMESPACE_BEGIN 51 52 // ------------------------------------- 53 54 BreakIterator* 55 BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) 56 { 57 char fnbuff[256]; 58 char ext[4]={'\0'}; 59 CharString actualLocale; 60 int32_t size; 61 const UChar* brkfname = NULL; 62 UResourceBundle brkRulesStack; 63 UResourceBundle brkNameStack; 64 UResourceBundle *brkRules = &brkRulesStack; 65 UResourceBundle *brkName = &brkNameStack; 66 RuleBasedBreakIterator *result = NULL; 67 68 if (U_FAILURE(status)) 69 return NULL; 70 71 ures_initStackObject(brkRules); 72 ures_initStackObject(brkName); 73 74 // Get the locale 75 UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status); 76 77 // Get the "boundaries" array. 78 if (U_SUCCESS(status)) { 79 brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); 80 // Get the string object naming the rules file 81 brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); 82 // Get the actual string 83 brkfname = ures_getString(brkName, &size, &status); 84 U_ASSERT((size_t)size<sizeof(fnbuff)); 85 if ((size_t)size>=sizeof(fnbuff)) { 86 size=0; 87 if (U_SUCCESS(status)) { 88 status = U_BUFFER_OVERFLOW_ERROR; 89 } 90 } 91 92 // Use the string if we found it 93 if (U_SUCCESS(status) && brkfname) { 94 actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); 95 96 UChar* extStart=u_strchr(brkfname, 0x002e); 97 int len = 0; 98 if(extStart!=NULL){ 99 len = (int)(extStart-brkfname); 100 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff 101 u_UCharsToChars(brkfname, fnbuff, len); 102 } 103 fnbuff[len]=0; // nul terminate 104 } 105 } 106 107 ures_close(brkRules); 108 ures_close(brkName); 109 110 UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); 111 if (U_FAILURE(status)) { 112 ures_close(b); 113 return NULL; 114 } 115 116 // Create a RuleBasedBreakIterator 117 result = new RuleBasedBreakIterator(file, status); 118 119 // If there is a result, set the valid locale and actual locale, and the kind 120 if (U_SUCCESS(status) && result != NULL) { 121 U_LOCALE_BASED(locBased, *(BreakIterator*)result); 122 locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), 123 actualLocale.data()); 124 result->setBreakType(kind); 125 } 126 127 ures_close(b); 128 129 if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple 130 delete result; 131 return NULL; 132 } 133 134 if (result == NULL) { 135 udata_close(file); 136 if (U_SUCCESS(status)) { 137 status = U_MEMORY_ALLOCATION_ERROR; 138 } 139 } 140 141 return result; 142 } 143 144 // Creates a break iterator for word breaks. 145 BreakIterator* U_EXPORT2 146 BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) 147 { 148 return createInstance(key, UBRK_WORD, status); 149 } 150 151 // ------------------------------------- 152 153 // Creates a break iterator for line breaks. 154 BreakIterator* U_EXPORT2 155 BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) 156 { 157 return createInstance(key, UBRK_LINE, status); 158 } 159 160 // ------------------------------------- 161 162 // Creates a break iterator for character breaks. 163 BreakIterator* U_EXPORT2 164 BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) 165 { 166 return createInstance(key, UBRK_CHARACTER, status); 167 } 168 169 // ------------------------------------- 170 171 // Creates a break iterator for sentence breaks. 172 BreakIterator* U_EXPORT2 173 BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) 174 { 175 return createInstance(key, UBRK_SENTENCE, status); 176 } 177 178 // ------------------------------------- 179 180 // Creates a break iterator for title casing breaks. 181 BreakIterator* U_EXPORT2 182 BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) 183 { 184 return createInstance(key, UBRK_TITLE, status); 185 } 186 187 // ------------------------------------- 188 189 // Gets all the available locales that has localized text boundary data. 190 const Locale* U_EXPORT2 191 BreakIterator::getAvailableLocales(int32_t& count) 192 { 193 return Locale::getAvailableLocales(count); 194 } 195 196 // ------------------------------------------ 197 // 198 // Constructors, destructor and assignment operator 199 // 200 //------------------------------------------- 201 202 BreakIterator::BreakIterator() 203 { 204 *validLocale = *actualLocale = 0; 205 } 206 207 BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) { 208 uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); 209 uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); 210 } 211 212 BreakIterator &BreakIterator::operator =(const BreakIterator &other) { 213 if (this != &other) { 214 uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); 215 uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); 216 } 217 return *this; 218 } 219 220 BreakIterator::~BreakIterator() 221 { 222 } 223 224 // ------------------------------------------ 225 // 226 // Registration 227 // 228 //------------------------------------------- 229 #if !UCONFIG_NO_SERVICE 230 231 // ------------------------------------- 232 233 class ICUBreakIteratorFactory : public ICUResourceBundleFactory { 234 public: 235 virtual ~ICUBreakIteratorFactory(); 236 protected: 237 virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { 238 return BreakIterator::makeInstance(loc, kind, status); 239 } 240 }; 241 242 ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} 243 244 // ------------------------------------- 245 246 class ICUBreakIteratorService : public ICULocaleService { 247 public: 248 ICUBreakIteratorService() 249 : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) 250 { 251 UErrorCode status = U_ZERO_ERROR; 252 registerFactory(new ICUBreakIteratorFactory(), status); 253 } 254 255 virtual ~ICUBreakIteratorService(); 256 257 virtual UObject* cloneInstance(UObject* instance) const { 258 return ((BreakIterator*)instance)->clone(); 259 } 260 261 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { 262 LocaleKey& lkey = (LocaleKey&)key; 263 int32_t kind = lkey.kind(); 264 Locale loc; 265 lkey.currentLocale(loc); 266 return BreakIterator::makeInstance(loc, kind, status); 267 } 268 269 virtual UBool isDefault() const { 270 return countFactories() == 1; 271 } 272 }; 273 274 ICUBreakIteratorService::~ICUBreakIteratorService() {} 275 276 // ------------------------------------- 277 278 // defined in ucln_cmn.h 279 U_NAMESPACE_END 280 281 static icu::UInitOnce gInitOnceBrkiter; 282 static icu::ICULocaleService* gService = NULL; 283 284 285 286 /** 287 * Release all static memory held by breakiterator. 288 */ 289 U_CDECL_BEGIN 290 static UBool U_CALLCONV breakiterator_cleanup(void) { 291 #if !UCONFIG_NO_SERVICE 292 if (gService) { 293 delete gService; 294 gService = NULL; 295 } 296 gInitOnceBrkiter.reset(); 297 #endif 298 return TRUE; 299 } 300 U_CDECL_END 301 U_NAMESPACE_BEGIN 302 303 static void U_CALLCONV 304 initService(void) { 305 gService = new ICUBreakIteratorService(); 306 ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); 307 } 308 309 static ICULocaleService* 310 getService(void) 311 { 312 umtx_initOnce(gInitOnceBrkiter, &initService); 313 return gService; 314 } 315 316 317 // ------------------------------------- 318 319 static inline UBool 320 hasService(void) 321 { 322 return !gInitOnceBrkiter.isReset() && getService() != NULL; 323 } 324 325 // ------------------------------------- 326 327 URegistryKey U_EXPORT2 328 BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) 329 { 330 ICULocaleService *service = getService(); 331 if (service == NULL) { 332 status = U_MEMORY_ALLOCATION_ERROR; 333 return NULL; 334 } 335 return service->registerInstance(toAdopt, locale, kind, status); 336 } 337 338 // ------------------------------------- 339 340 UBool U_EXPORT2 341 BreakIterator::unregister(URegistryKey key, UErrorCode& status) 342 { 343 if (U_SUCCESS(status)) { 344 if (hasService()) { 345 return gService->unregister(key, status); 346 } 347 status = U_MEMORY_ALLOCATION_ERROR; 348 } 349 return FALSE; 350 } 351 352 // ------------------------------------- 353 354 StringEnumeration* U_EXPORT2 355 BreakIterator::getAvailableLocales(void) 356 { 357 ICULocaleService *service = getService(); 358 if (service == NULL) { 359 return NULL; 360 } 361 return service->getAvailableLocales(); 362 } 363 #endif /* UCONFIG_NO_SERVICE */ 364 365 // ------------------------------------- 366 367 BreakIterator* 368 BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) 369 { 370 if (U_FAILURE(status)) { 371 return NULL; 372 } 373 374 #if !UCONFIG_NO_SERVICE 375 if (hasService()) { 376 Locale actualLoc(""); 377 BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); 378 // TODO: The way the service code works in ICU 2.8 is that if 379 // there is a real registered break iterator, the actualLoc 380 // will be populated, but if the handleDefault path is taken 381 // (because nothing is registered that can handle the 382 // requested locale) then the actualLoc comes back empty. In 383 // that case, the returned object already has its actual/valid 384 // locale data populated (by makeInstance, which is what 385 // handleDefault calls), so we don't touch it. YES, A COMMENT 386 // THIS LONG is a sign of bad code -- so the action item is to 387 // revisit this in ICU 3.0 and clean it up/fix it/remove it. 388 if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { 389 U_LOCALE_BASED(locBased, *result); 390 locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); 391 } 392 return result; 393 } 394 else 395 #endif 396 { 397 return makeInstance(loc, kind, status); 398 } 399 } 400 401 // ------------------------------------- 402 enum { kKeyValueLenMax = 32 }; 403 404 BreakIterator* 405 BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) 406 { 407 408 if (U_FAILURE(status)) { 409 return NULL; 410 } 411 char lbType[kKeyValueLenMax]; 412 413 BreakIterator *result = NULL; 414 switch (kind) { 415 case UBRK_CHARACTER: 416 result = BreakIterator::buildInstance(loc, "grapheme", kind, status); 417 break; 418 case UBRK_WORD: 419 result = BreakIterator::buildInstance(loc, "word", kind, status); 420 break; 421 case UBRK_LINE: 422 uprv_strcpy(lbType, "line"); 423 { 424 char lbKeyValue[kKeyValueLenMax] = {0}; 425 UErrorCode kvStatus = U_ZERO_ERROR; 426 int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus); 427 if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) { 428 uprv_strcat(lbType, "_"); 429 uprv_strcat(lbType, lbKeyValue); 430 } 431 } 432 result = BreakIterator::buildInstance(loc, lbType, kind, status); 433 break; 434 case UBRK_SENTENCE: 435 result = BreakIterator::buildInstance(loc, "sentence", kind, status); 436 #if !UCONFIG_NO_FILTERED_BREAK_ITERATION 437 { 438 char ssKeyValue[kKeyValueLenMax] = {0}; 439 UErrorCode kvStatus = U_ZERO_ERROR; 440 int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus); 441 if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) { 442 FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus); 443 if (U_SUCCESS(kvStatus)) { 444 result = fbiBuilder->build(result, status); 445 delete fbiBuilder; 446 } 447 } 448 } 449 #endif 450 break; 451 case UBRK_TITLE: 452 result = BreakIterator::buildInstance(loc, "title", kind, status); 453 break; 454 default: 455 status = U_ILLEGAL_ARGUMENT_ERROR; 456 } 457 458 if (U_FAILURE(status)) { 459 return NULL; 460 } 461 462 return result; 463 } 464 465 Locale 466 BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { 467 U_LOCALE_BASED(locBased, *this); 468 return locBased.getLocale(type, status); 469 } 470 471 const char * 472 BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { 473 U_LOCALE_BASED(locBased, *this); 474 return locBased.getLocaleID(type, status); 475 } 476 477 478 // This implementation of getRuleStatus is a do-nothing stub, here to 479 // provide a default implementation for any derived BreakIterator classes that 480 // do not implement it themselves. 481 int32_t BreakIterator::getRuleStatus() const { 482 return 0; 483 } 484 485 // This implementation of getRuleStatusVec is a do-nothing stub, here to 486 // provide a default implementation for any derived BreakIterator classes that 487 // do not implement it themselves. 488 int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { 489 if (U_FAILURE(status)) { 490 return 0; 491 } 492 if (capacity < 1) { 493 status = U_BUFFER_OVERFLOW_ERROR; 494 return 1; 495 } 496 *fillInVec = 0; 497 return 1; 498 } 499 500 BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { 501 U_LOCALE_BASED(locBased, (*this)); 502 locBased.setLocaleIDs(valid, actual); 503 } 504 505 U_NAMESPACE_END 506 507 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 508 509 //eof 510