1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File brkiter.cpp 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 02/18/97 aliu Converted from OpenClass. Added DONE. 15 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. 16 ***************************************************************************************** 17 */ 18 19 // ***************************************************************************** 20 // This file was generated from the java source file BreakIterator.java 21 // ***************************************************************************** 22 23 #include "unicode/utypes.h" 24 25 #if !UCONFIG_NO_BREAK_ITERATION 26 27 #include "unicode/rbbi.h" 28 #include "unicode/brkiter.h" 29 #include "unicode/udata.h" 30 #include "unicode/ures.h" 31 #include "unicode/ustring.h" 32 #include "unicode/filteredbrk.h" 33 #include "ucln_cmn.h" 34 #include "cstring.h" 35 #include "umutex.h" 36 #include "servloc.h" 37 #include "locbased.h" 38 #include "uresimp.h" 39 #include "uassert.h" 40 #include "ubrkimpl.h" 41 #include "charstr.h" 42 43 // ***************************************************************************** 44 // class BreakIterator 45 // This class implements methods for finding the location of boundaries in text. 46 // Instances of BreakIterator maintain a current position and scan over text 47 // returning the index of characters where boundaries occur. 48 // ***************************************************************************** 49 50 U_NAMESPACE_BEGIN 51 52 // ------------------------------------- 53 54 BreakIterator* 55 BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status) 56 { 57 char fnbuff[256]; 58 char ext[4]={'\0'}; 59 CharString actualLocale; 60 int32_t size; 61 const UChar* brkfname = NULL; 62 UResourceBundle brkRulesStack; 63 UResourceBundle brkNameStack; 64 UResourceBundle *brkRules = &brkRulesStack; 65 UResourceBundle *brkName = &brkNameStack; 66 RuleBasedBreakIterator *result = NULL; 67 68 if (U_FAILURE(status)) 69 return NULL; 70 71 ures_initStackObject(brkRules); 72 ures_initStackObject(brkName); 73 74 // Get the locale 75 UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status); 76 77 // Get the "boundaries" array. 78 if (U_SUCCESS(status)) { 79 brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); 80 // Get the string object naming the rules file 81 brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); 82 // Get the actual string 83 brkfname = ures_getString(brkName, &size, &status); 84 U_ASSERT((size_t)size<sizeof(fnbuff)); 85 if ((size_t)size>=sizeof(fnbuff)) { 86 size=0; 87 if (U_SUCCESS(status)) { 88 status = U_BUFFER_OVERFLOW_ERROR; 89 } 90 } 91 92 // Use the string if we found it 93 if (U_SUCCESS(status) && brkfname) { 94 actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); 95 96 UChar* extStart=u_strchr(brkfname, 0x002e); 97 int len = 0; 98 if(extStart!=NULL){ 99 len = (int)(extStart-brkfname); 100 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff 101 u_UCharsToChars(brkfname, fnbuff, len); 102 } 103 fnbuff[len]=0; // nul terminate 104 } 105 } 106 107 ures_close(brkRules); 108 ures_close(brkName); 109 110 UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); 111 if (U_FAILURE(status)) { 112 ures_close(b); 113 return NULL; 114 } 115 116 // Create a RuleBasedBreakIterator 117 result = new RuleBasedBreakIterator(file, status); 118 119 // If there is a result, set the valid locale and actual locale, and the kind 120 if (U_SUCCESS(status) && result != NULL) { 121 U_LOCALE_BASED(locBased, *(BreakIterator*)result); 122 locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), 123 actualLocale.data()); 124 } 125 126 ures_close(b); 127 128 if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple 129 delete result; 130 return NULL; 131 } 132 133 if (result == NULL) { 134 udata_close(file); 135 if (U_SUCCESS(status)) { 136 status = U_MEMORY_ALLOCATION_ERROR; 137 } 138 } 139 140 return result; 141 } 142 143 // Creates a break iterator for word breaks. 144 BreakIterator* U_EXPORT2 145 BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) 146 { 147 return createInstance(key, UBRK_WORD, status); 148 } 149 150 // ------------------------------------- 151 152 // Creates a break iterator for line breaks. 153 BreakIterator* U_EXPORT2 154 BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) 155 { 156 return createInstance(key, UBRK_LINE, status); 157 } 158 159 // ------------------------------------- 160 161 // Creates a break iterator for character breaks. 162 BreakIterator* U_EXPORT2 163 BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) 164 { 165 return createInstance(key, UBRK_CHARACTER, status); 166 } 167 168 // ------------------------------------- 169 170 // Creates a break iterator for sentence breaks. 171 BreakIterator* U_EXPORT2 172 BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) 173 { 174 return createInstance(key, UBRK_SENTENCE, status); 175 } 176 177 // ------------------------------------- 178 179 // Creates a break iterator for title casing breaks. 180 BreakIterator* U_EXPORT2 181 BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) 182 { 183 return createInstance(key, UBRK_TITLE, status); 184 } 185 186 // ------------------------------------- 187 188 // Gets all the available locales that has localized text boundary data. 189 const Locale* U_EXPORT2 190 BreakIterator::getAvailableLocales(int32_t& count) 191 { 192 return Locale::getAvailableLocales(count); 193 } 194 195 // ------------------------------------------ 196 // 197 // Constructors, destructor and assignment operator 198 // 199 //------------------------------------------- 200 201 BreakIterator::BreakIterator() 202 { 203 *validLocale = *actualLocale = 0; 204 } 205 206 BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) { 207 uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); 208 uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); 209 } 210 211 BreakIterator &BreakIterator::operator =(const BreakIterator &other) { 212 if (this != &other) { 213 uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); 214 uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); 215 } 216 return *this; 217 } 218 219 BreakIterator::~BreakIterator() 220 { 221 } 222 223 // ------------------------------------------ 224 // 225 // Registration 226 // 227 //------------------------------------------- 228 #if !UCONFIG_NO_SERVICE 229 230 // ------------------------------------- 231 232 class ICUBreakIteratorFactory : public ICUResourceBundleFactory { 233 public: 234 virtual ~ICUBreakIteratorFactory(); 235 protected: 236 virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { 237 return BreakIterator::makeInstance(loc, kind, status); 238 } 239 }; 240 241 ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} 242 243 // ------------------------------------- 244 245 class ICUBreakIteratorService : public ICULocaleService { 246 public: 247 ICUBreakIteratorService() 248 : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) 249 { 250 UErrorCode status = U_ZERO_ERROR; 251 registerFactory(new ICUBreakIteratorFactory(), status); 252 } 253 254 virtual ~ICUBreakIteratorService(); 255 256 virtual UObject* cloneInstance(UObject* instance) const { 257 return ((BreakIterator*)instance)->clone(); 258 } 259 260 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { 261 LocaleKey& lkey = (LocaleKey&)key; 262 int32_t kind = lkey.kind(); 263 Locale loc; 264 lkey.currentLocale(loc); 265 return BreakIterator::makeInstance(loc, kind, status); 266 } 267 268 virtual UBool isDefault() const { 269 return countFactories() == 1; 270 } 271 }; 272 273 ICUBreakIteratorService::~ICUBreakIteratorService() {} 274 275 // ------------------------------------- 276 277 // defined in ucln_cmn.h 278 U_NAMESPACE_END 279 280 static icu::UInitOnce gInitOnceBrkiter; 281 static icu::ICULocaleService* gService = NULL; 282 283 284 285 /** 286 * Release all static memory held by breakiterator. 287 */ 288 U_CDECL_BEGIN 289 static UBool U_CALLCONV breakiterator_cleanup(void) { 290 #if !UCONFIG_NO_SERVICE 291 if (gService) { 292 delete gService; 293 gService = NULL; 294 } 295 gInitOnceBrkiter.reset(); 296 #endif 297 return TRUE; 298 } 299 U_CDECL_END 300 U_NAMESPACE_BEGIN 301 302 static void U_CALLCONV 303 initService(void) { 304 gService = new ICUBreakIteratorService(); 305 ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); 306 } 307 308 static ICULocaleService* 309 getService(void) 310 { 311 umtx_initOnce(gInitOnceBrkiter, &initService); 312 return gService; 313 } 314 315 316 // ------------------------------------- 317 318 static inline UBool 319 hasService(void) 320 { 321 return !gInitOnceBrkiter.isReset() && getService() != NULL; 322 } 323 324 // ------------------------------------- 325 326 URegistryKey U_EXPORT2 327 BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) 328 { 329 ICULocaleService *service = getService(); 330 if (service == NULL) { 331 status = U_MEMORY_ALLOCATION_ERROR; 332 return NULL; 333 } 334 return service->registerInstance(toAdopt, locale, kind, status); 335 } 336 337 // ------------------------------------- 338 339 UBool U_EXPORT2 340 BreakIterator::unregister(URegistryKey key, UErrorCode& status) 341 { 342 if (U_SUCCESS(status)) { 343 if (hasService()) { 344 return gService->unregister(key, status); 345 } 346 status = U_MEMORY_ALLOCATION_ERROR; 347 } 348 return FALSE; 349 } 350 351 // ------------------------------------- 352 353 StringEnumeration* U_EXPORT2 354 BreakIterator::getAvailableLocales(void) 355 { 356 ICULocaleService *service = getService(); 357 if (service == NULL) { 358 return NULL; 359 } 360 return service->getAvailableLocales(); 361 } 362 #endif /* UCONFIG_NO_SERVICE */ 363 364 // ------------------------------------- 365 366 BreakIterator* 367 BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) 368 { 369 if (U_FAILURE(status)) { 370 return NULL; 371 } 372 373 #if !UCONFIG_NO_SERVICE 374 if (hasService()) { 375 Locale actualLoc(""); 376 BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); 377 // TODO: The way the service code works in ICU 2.8 is that if 378 // there is a real registered break iterator, the actualLoc 379 // will be populated, but if the handleDefault path is taken 380 // (because nothing is registered that can handle the 381 // requested locale) then the actualLoc comes back empty. In 382 // that case, the returned object already has its actual/valid 383 // locale data populated (by makeInstance, which is what 384 // handleDefault calls), so we don't touch it. YES, A COMMENT 385 // THIS LONG is a sign of bad code -- so the action item is to 386 // revisit this in ICU 3.0 and clean it up/fix it/remove it. 387 if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { 388 U_LOCALE_BASED(locBased, *result); 389 locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); 390 } 391 return result; 392 } 393 else 394 #endif 395 { 396 return makeInstance(loc, kind, status); 397 } 398 } 399 400 // ------------------------------------- 401 enum { kKeyValueLenMax = 32 }; 402 403 BreakIterator* 404 BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) 405 { 406 407 if (U_FAILURE(status)) { 408 return NULL; 409 } 410 char lbType[kKeyValueLenMax]; 411 412 BreakIterator *result = NULL; 413 switch (kind) { 414 case UBRK_CHARACTER: 415 result = BreakIterator::buildInstance(loc, "grapheme", status); 416 break; 417 case UBRK_WORD: 418 result = BreakIterator::buildInstance(loc, "word", status); 419 break; 420 case UBRK_LINE: 421 uprv_strcpy(lbType, "line"); 422 { 423 char lbKeyValue[kKeyValueLenMax] = {0}; 424 UErrorCode kvStatus = U_ZERO_ERROR; 425 int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus); 426 if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) { 427 uprv_strcat(lbType, "_"); 428 uprv_strcat(lbType, lbKeyValue); 429 } 430 } 431 result = BreakIterator::buildInstance(loc, lbType, status); 432 break; 433 case UBRK_SENTENCE: 434 result = BreakIterator::buildInstance(loc, "sentence", status); 435 #if !UCONFIG_NO_FILTERED_BREAK_ITERATION 436 { 437 char ssKeyValue[kKeyValueLenMax] = {0}; 438 UErrorCode kvStatus = U_ZERO_ERROR; 439 int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus); 440 if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) { 441 FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus); 442 if (U_SUCCESS(kvStatus)) { 443 result = fbiBuilder->build(result, status); 444 delete fbiBuilder; 445 } 446 } 447 } 448 #endif 449 break; 450 case UBRK_TITLE: 451 result = BreakIterator::buildInstance(loc, "title", status); 452 break; 453 default: 454 status = U_ILLEGAL_ARGUMENT_ERROR; 455 } 456 457 if (U_FAILURE(status)) { 458 return NULL; 459 } 460 461 return result; 462 } 463 464 Locale 465 BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { 466 U_LOCALE_BASED(locBased, *this); 467 return locBased.getLocale(type, status); 468 } 469 470 const char * 471 BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { 472 U_LOCALE_BASED(locBased, *this); 473 return locBased.getLocaleID(type, status); 474 } 475 476 477 // This implementation of getRuleStatus is a do-nothing stub, here to 478 // provide a default implementation for any derived BreakIterator classes that 479 // do not implement it themselves. 480 int32_t BreakIterator::getRuleStatus() const { 481 return 0; 482 } 483 484 // This implementation of getRuleStatusVec is a do-nothing stub, here to 485 // provide a default implementation for any derived BreakIterator classes that 486 // do not implement it themselves. 487 int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { 488 if (U_FAILURE(status)) { 489 return 0; 490 } 491 if (capacity < 1) { 492 status = U_BUFFER_OVERFLOW_ERROR; 493 return 1; 494 } 495 *fillInVec = 0; 496 return 1; 497 } 498 499 BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { 500 U_LOCALE_BASED(locBased, (*this)); 501 locBased.setLocaleIDs(valid, actual); 502 } 503 504 U_NAMESPACE_END 505 506 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 507 508 //eof 509