1 /* 2 ******************************************************************************* 3 * Copyright (C) 1997-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File brkiter.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/18/97 aliu Converted from OpenClass. Added DONE. 13 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. 14 ***************************************************************************************** 15 */ 16 17 // ***************************************************************************** 18 // This file was generated from the java source file BreakIterator.java 19 // ***************************************************************************** 20 21 #include "unicode/utypes.h" 22 23 #if !UCONFIG_NO_BREAK_ITERATION 24 25 #include "unicode/rbbi.h" 26 #include "unicode/brkiter.h" 27 #include "unicode/udata.h" 28 #include "unicode/ures.h" 29 #include "unicode/ustring.h" 30 #include "unicode/filteredbrk.h" 31 #include "ucln_cmn.h" 32 #include "cstring.h" 33 #include "umutex.h" 34 #include "servloc.h" 35 #include "locbased.h" 36 #include "uresimp.h" 37 #include "uassert.h" 38 #include "ubrkimpl.h" 39 #include "charstr.h" 40 41 // ***************************************************************************** 42 // class BreakIterator 43 // This class implements methods for finding the location of boundaries in text. 44 // Instances of BreakIterator maintain a current position and scan over text 45 // returning the index of characters where boundaries occur. 46 // ***************************************************************************** 47 48 U_NAMESPACE_BEGIN 49 50 // ------------------------------------- 51 52 BreakIterator* 53 BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) 54 { 55 char fnbuff[256]; 56 char ext[4]={'\0'}; 57 CharString actualLocale; 58 int32_t size; 59 const UChar* brkfname = NULL; 60 UResourceBundle brkRulesStack; 61 UResourceBundle brkNameStack; 62 UResourceBundle *brkRules = &brkRulesStack; 63 UResourceBundle *brkName = &brkNameStack; 64 RuleBasedBreakIterator *result = NULL; 65 66 if (U_FAILURE(status)) 67 return NULL; 68 69 ures_initStackObject(brkRules); 70 ures_initStackObject(brkName); 71 72 // Get the locale 73 UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status); 74 75 // Get the "boundaries" array. 76 if (U_SUCCESS(status)) { 77 brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); 78 // Get the string object naming the rules file 79 brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); 80 // Get the actual string 81 brkfname = ures_getString(brkName, &size, &status); 82 U_ASSERT((size_t)size<sizeof(fnbuff)); 83 if ((size_t)size>=sizeof(fnbuff)) { 84 size=0; 85 if (U_SUCCESS(status)) { 86 status = U_BUFFER_OVERFLOW_ERROR; 87 } 88 } 89 90 // Use the string if we found it 91 if (U_SUCCESS(status) && brkfname) { 92 actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); 93 94 UChar* extStart=u_strchr(brkfname, 0x002e); 95 int len = 0; 96 if(extStart!=NULL){ 97 len = (int)(extStart-brkfname); 98 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff 99 u_UCharsToChars(brkfname, fnbuff, len); 100 } 101 fnbuff[len]=0; // nul terminate 102 } 103 } 104 105 ures_close(brkRules); 106 ures_close(brkName); 107 108 UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); 109 if (U_FAILURE(status)) { 110 ures_close(b); 111 return NULL; 112 } 113 114 // Create a RuleBasedBreakIterator 115 result = new RuleBasedBreakIterator(file, status); 116 117 // If there is a result, set the valid locale and actual locale, and the kind 118 if (U_SUCCESS(status) && result != NULL) { 119 U_LOCALE_BASED(locBased, *(BreakIterator*)result); 120 locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), 121 actualLocale.data()); 122 result->setBreakType(kind); 123 } 124 125 ures_close(b); 126 127 if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple 128 delete result; 129 return NULL; 130 } 131 132 if (result == NULL) { 133 udata_close(file); 134 if (U_SUCCESS(status)) { 135 status = U_MEMORY_ALLOCATION_ERROR; 136 } 137 } 138 139 return result; 140 } 141 142 // Creates a break iterator for word breaks. 143 BreakIterator* U_EXPORT2 144 BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) 145 { 146 return createInstance(key, UBRK_WORD, status); 147 } 148 149 // ------------------------------------- 150 151 // Creates a break iterator for line breaks. 152 BreakIterator* U_EXPORT2 153 BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) 154 { 155 return createInstance(key, UBRK_LINE, status); 156 } 157 158 // ------------------------------------- 159 160 // Creates a break iterator for character breaks. 161 BreakIterator* U_EXPORT2 162 BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) 163 { 164 return createInstance(key, UBRK_CHARACTER, status); 165 } 166 167 // ------------------------------------- 168 169 // Creates a break iterator for sentence breaks. 170 BreakIterator* U_EXPORT2 171 BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) 172 { 173 return createInstance(key, UBRK_SENTENCE, status); 174 } 175 176 // ------------------------------------- 177 178 // Creates a break iterator for title casing breaks. 179 BreakIterator* U_EXPORT2 180 BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) 181 { 182 return createInstance(key, UBRK_TITLE, status); 183 } 184 185 // ------------------------------------- 186 187 // Gets all the available locales that has localized text boundary data. 188 const Locale* U_EXPORT2 189 BreakIterator::getAvailableLocales(int32_t& count) 190 { 191 return Locale::getAvailableLocales(count); 192 } 193 194 // ------------------------------------------ 195 // 196 // Default constructor and destructor 197 // 198 //------------------------------------------- 199 200 BreakIterator::BreakIterator() 201 { 202 *validLocale = *actualLocale = 0; 203 } 204 205 BreakIterator::~BreakIterator() 206 { 207 } 208 209 // ------------------------------------------ 210 // 211 // Registration 212 // 213 //------------------------------------------- 214 #if !UCONFIG_NO_SERVICE 215 216 // ------------------------------------- 217 218 class ICUBreakIteratorFactory : public ICUResourceBundleFactory { 219 public: 220 virtual ~ICUBreakIteratorFactory(); 221 protected: 222 virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { 223 return BreakIterator::makeInstance(loc, kind, status); 224 } 225 }; 226 227 ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} 228 229 // ------------------------------------- 230 231 class ICUBreakIteratorService : public ICULocaleService { 232 public: 233 ICUBreakIteratorService() 234 : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) 235 { 236 UErrorCode status = U_ZERO_ERROR; 237 registerFactory(new ICUBreakIteratorFactory(), status); 238 } 239 240 virtual ~ICUBreakIteratorService(); 241 242 virtual UObject* cloneInstance(UObject* instance) const { 243 return ((BreakIterator*)instance)->clone(); 244 } 245 246 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { 247 LocaleKey& lkey = (LocaleKey&)key; 248 int32_t kind = lkey.kind(); 249 Locale loc; 250 lkey.currentLocale(loc); 251 return BreakIterator::makeInstance(loc, kind, status); 252 } 253 254 virtual UBool isDefault() const { 255 return countFactories() == 1; 256 } 257 }; 258 259 ICUBreakIteratorService::~ICUBreakIteratorService() {} 260 261 // ------------------------------------- 262 263 // defined in ucln_cmn.h 264 U_NAMESPACE_END 265 266 static icu::UInitOnce gInitOnce; 267 static icu::ICULocaleService* gService = NULL; 268 269 270 271 /** 272 * Release all static memory held by breakiterator. 273 */ 274 U_CDECL_BEGIN 275 static UBool U_CALLCONV breakiterator_cleanup(void) { 276 #if !UCONFIG_NO_SERVICE 277 if (gService) { 278 delete gService; 279 gService = NULL; 280 } 281 gInitOnce.reset(); 282 #endif 283 return TRUE; 284 } 285 U_CDECL_END 286 U_NAMESPACE_BEGIN 287 288 static void U_CALLCONV 289 initService(void) { 290 gService = new ICUBreakIteratorService(); 291 ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); 292 } 293 294 static ICULocaleService* 295 getService(void) 296 { 297 umtx_initOnce(gInitOnce, &initService); 298 return gService; 299 } 300 301 302 // ------------------------------------- 303 304 static inline UBool 305 hasService(void) 306 { 307 return !gInitOnce.isReset() && getService() != NULL; 308 } 309 310 // ------------------------------------- 311 312 URegistryKey U_EXPORT2 313 BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) 314 { 315 ICULocaleService *service = getService(); 316 if (service == NULL) { 317 status = U_MEMORY_ALLOCATION_ERROR; 318 return NULL; 319 } 320 return service->registerInstance(toAdopt, locale, kind, status); 321 } 322 323 // ------------------------------------- 324 325 UBool U_EXPORT2 326 BreakIterator::unregister(URegistryKey key, UErrorCode& status) 327 { 328 if (U_SUCCESS(status)) { 329 if (hasService()) { 330 return gService->unregister(key, status); 331 } 332 status = U_MEMORY_ALLOCATION_ERROR; 333 } 334 return FALSE; 335 } 336 337 // ------------------------------------- 338 339 StringEnumeration* U_EXPORT2 340 BreakIterator::getAvailableLocales(void) 341 { 342 ICULocaleService *service = getService(); 343 if (service == NULL) { 344 return NULL; 345 } 346 return service->getAvailableLocales(); 347 } 348 #endif /* UCONFIG_NO_SERVICE */ 349 350 // ------------------------------------- 351 352 BreakIterator* 353 BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) 354 { 355 if (U_FAILURE(status)) { 356 return NULL; 357 } 358 359 #if !UCONFIG_NO_SERVICE 360 if (hasService()) { 361 Locale actualLoc(""); 362 BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); 363 // TODO: The way the service code works in ICU 2.8 is that if 364 // there is a real registered break iterator, the actualLoc 365 // will be populated, but if the handleDefault path is taken 366 // (because nothing is registered that can handle the 367 // requested locale) then the actualLoc comes back empty. In 368 // that case, the returned object already has its actual/valid 369 // locale data populated (by makeInstance, which is what 370 // handleDefault calls), so we don't touch it. YES, A COMMENT 371 // THIS LONG is a sign of bad code -- so the action item is to 372 // revisit this in ICU 3.0 and clean it up/fix it/remove it. 373 if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { 374 U_LOCALE_BASED(locBased, *result); 375 locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); 376 } 377 return result; 378 } 379 else 380 #endif 381 { 382 return makeInstance(loc, kind, status); 383 } 384 } 385 386 // ------------------------------------- 387 enum { kKeyValueLenMax = 32 }; 388 389 BreakIterator* 390 BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) 391 { 392 393 if (U_FAILURE(status)) { 394 return NULL; 395 } 396 char lbType[kKeyValueLenMax]; 397 398 BreakIterator *result = NULL; 399 switch (kind) { 400 case UBRK_CHARACTER: 401 result = BreakIterator::buildInstance(loc, "grapheme", kind, status); 402 break; 403 case UBRK_WORD: 404 result = BreakIterator::buildInstance(loc, "word", kind, status); 405 break; 406 case UBRK_LINE: 407 uprv_strcpy(lbType, "line"); 408 { 409 char lbKeyValue[kKeyValueLenMax] = {0}; 410 UErrorCode kvStatus = U_ZERO_ERROR; 411 int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus); 412 if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) { 413 uprv_strcat(lbType, "_"); 414 uprv_strcat(lbType, lbKeyValue); 415 } 416 } 417 result = BreakIterator::buildInstance(loc, lbType, kind, status); 418 break; 419 case UBRK_SENTENCE: 420 result = BreakIterator::buildInstance(loc, "sentence", kind, status); 421 { 422 char ssKeyValue[kKeyValueLenMax] = {0}; 423 UErrorCode kvStatus = U_ZERO_ERROR; 424 int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus); 425 if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) { 426 FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus); 427 if (U_SUCCESS(kvStatus)) { 428 result = fbiBuilder->build(result, status); 429 delete fbiBuilder; 430 } 431 } 432 } 433 break; 434 case UBRK_TITLE: 435 result = BreakIterator::buildInstance(loc, "title", kind, status); 436 break; 437 default: 438 status = U_ILLEGAL_ARGUMENT_ERROR; 439 } 440 441 if (U_FAILURE(status)) { 442 return NULL; 443 } 444 445 return result; 446 } 447 448 Locale 449 BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { 450 U_LOCALE_BASED(locBased, *this); 451 return locBased.getLocale(type, status); 452 } 453 454 const char * 455 BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { 456 U_LOCALE_BASED(locBased, *this); 457 return locBased.getLocaleID(type, status); 458 } 459 460 461 // This implementation of getRuleStatus is a do-nothing stub, here to 462 // provide a default implementation for any derived BreakIterator classes that 463 // do not implement it themselves. 464 int32_t BreakIterator::getRuleStatus() const { 465 return 0; 466 } 467 468 // This implementation of getRuleStatusVec is a do-nothing stub, here to 469 // provide a default implementation for any derived BreakIterator classes that 470 // do not implement it themselves. 471 int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { 472 if (U_FAILURE(status)) { 473 return 0; 474 } 475 if (capacity < 1) { 476 status = U_BUFFER_OVERFLOW_ERROR; 477 return 1; 478 } 479 *fillInVec = 0; 480 return 1; 481 } 482 483 BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { 484 U_LOCALE_BASED(locBased, (*this)); 485 locBased.setLocaleIDs(valid, actual); 486 } 487 488 U_NAMESPACE_END 489 490 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 491 492 //eof 493