1 /* 2 ******************************************************************************* 3 * Copyright (C) 1997-2008, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 * File TXTBDRY.CPP 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 02/18/97 aliu Converted from OpenClass. Added DONE. 13 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. 14 ***************************************************************************************** 15 */ 16 17 // ***************************************************************************** 18 // This file was generated from the java source file BreakIterator.java 19 // ***************************************************************************** 20 21 #include "unicode/utypes.h" 22 23 #if !UCONFIG_NO_BREAK_ITERATION 24 25 #include "unicode/rbbi.h" 26 #include "unicode/brkiter.h" 27 #include "unicode/udata.h" 28 #include "unicode/ures.h" 29 #include "unicode/ustring.h" 30 #include "ucln_cmn.h" 31 #include "cstring.h" 32 #include "umutex.h" 33 #include "servloc.h" 34 #include "locbased.h" 35 #include "uresimp.h" 36 #include "uassert.h" 37 #include "ubrkimpl.h" 38 39 // ***************************************************************************** 40 // class BreakIterator 41 // This class implements methods for finding the location of boundaries in text. 42 // Instances of BreakIterator maintain a current position and scan over text 43 // returning the index of characters where boundaries occur. 44 // ***************************************************************************** 45 46 U_NAMESPACE_BEGIN 47 48 // ------------------------------------- 49 50 BreakIterator* 51 BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) 52 { 53 char fnbuff[256]; 54 char ext[4]={'\0'}; 55 char actualLocale[ULOC_FULLNAME_CAPACITY]; 56 int32_t size; 57 const UChar* brkfname = NULL; 58 UResourceBundle brkRulesStack; 59 UResourceBundle brkNameStack; 60 UResourceBundle *brkRules = &brkRulesStack; 61 UResourceBundle *brkName = &brkNameStack; 62 RuleBasedBreakIterator *result = NULL; 63 64 if (U_FAILURE(status)) 65 return NULL; 66 67 ures_initStackObject(brkRules); 68 ures_initStackObject(brkName); 69 70 // Get the locale 71 UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status); 72 /* this is a hack for now. Should be fixed when the data is fetched from 73 brk_index.txt */ 74 if(status==U_USING_DEFAULT_WARNING){ 75 status=U_ZERO_ERROR; 76 ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status); 77 } 78 79 // Get the "boundaries" array. 80 if (U_SUCCESS(status)) { 81 brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); 82 // Get the string object naming the rules file 83 brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); 84 // Get the actual string 85 brkfname = ures_getString(brkName, &size, &status); 86 U_ASSERT((size_t)size<sizeof(fnbuff)); 87 if ((size_t)size>=sizeof(fnbuff)) { 88 size=0; 89 if (U_SUCCESS(status)) { 90 status = U_BUFFER_OVERFLOW_ERROR; 91 } 92 } 93 94 // Use the string if we found it 95 if (U_SUCCESS(status) && brkfname) { 96 uprv_strncpy(actualLocale, 97 ures_getLocale(brkName, &status), 98 sizeof(actualLocale)/sizeof(actualLocale[0])); 99 100 UChar* extStart=u_strchr(brkfname, 0x002e); 101 int len = 0; 102 if(extStart!=NULL){ 103 len = extStart-brkfname; 104 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff 105 u_UCharsToChars(brkfname, fnbuff, len); 106 } 107 fnbuff[len]=0; // nul terminate 108 } 109 } 110 111 ures_close(brkRules); 112 ures_close(brkName); 113 114 UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); 115 if (U_FAILURE(status)) { 116 ures_close(b); 117 return NULL; 118 } 119 120 // Create a RuleBasedBreakIterator 121 result = new RuleBasedBreakIterator(file, status); 122 123 // If there is a result, set the valid locale and actual locale, and the kind 124 if (U_SUCCESS(status) && result != NULL) { 125 U_LOCALE_BASED(locBased, *(BreakIterator*)result); 126 locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale); 127 result->setBreakType(kind); 128 } 129 130 ures_close(b); 131 132 if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple 133 delete result; 134 return NULL; 135 } 136 137 if (result == NULL) { 138 udata_close(file); 139 if (U_SUCCESS(status)) { 140 status = U_MEMORY_ALLOCATION_ERROR; 141 } 142 } 143 144 return result; 145 } 146 147 // Creates a break iterator for word breaks. 148 BreakIterator* U_EXPORT2 149 BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) 150 { 151 return createInstance(key, UBRK_WORD, status); 152 } 153 154 // ------------------------------------- 155 156 // Creates a break iterator for line breaks. 157 BreakIterator* U_EXPORT2 158 BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) 159 { 160 return createInstance(key, UBRK_LINE, status); 161 } 162 163 // ------------------------------------- 164 165 // Creates a break iterator for character breaks. 166 BreakIterator* U_EXPORT2 167 BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) 168 { 169 return createInstance(key, UBRK_CHARACTER, status); 170 } 171 172 // ------------------------------------- 173 174 // Creates a break iterator for sentence breaks. 175 BreakIterator* U_EXPORT2 176 BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) 177 { 178 return createInstance(key, UBRK_SENTENCE, status); 179 } 180 181 // ------------------------------------- 182 183 // Creates a break iterator for title casing breaks. 184 BreakIterator* U_EXPORT2 185 BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) 186 { 187 return createInstance(key, UBRK_TITLE, status); 188 } 189 190 // ------------------------------------- 191 192 // Gets all the available locales that has localized text boundary data. 193 const Locale* U_EXPORT2 194 BreakIterator::getAvailableLocales(int32_t& count) 195 { 196 return Locale::getAvailableLocales(count); 197 } 198 199 // ------------------------------------- 200 // Gets the objectLocale display name in the default locale language. 201 UnicodeString& U_EXPORT2 202 BreakIterator::getDisplayName(const Locale& objectLocale, 203 UnicodeString& name) 204 { 205 return objectLocale.getDisplayName(name); 206 } 207 208 // ------------------------------------- 209 // Gets the objectLocale display name in the displayLocale language. 210 UnicodeString& U_EXPORT2 211 BreakIterator::getDisplayName(const Locale& objectLocale, 212 const Locale& displayLocale, 213 UnicodeString& name) 214 { 215 return objectLocale.getDisplayName(displayLocale, name); 216 } 217 218 // ------------------------------------------ 219 // 220 // Default constructor and destructor 221 // 222 //------------------------------------------- 223 224 BreakIterator::BreakIterator() 225 { 226 fBufferClone = FALSE; 227 *validLocale = *actualLocale = 0; 228 } 229 230 BreakIterator::~BreakIterator() 231 { 232 } 233 234 // ------------------------------------------ 235 // 236 // Registration 237 // 238 //------------------------------------------- 239 #if !UCONFIG_NO_SERVICE 240 241 // ------------------------------------- 242 243 class ICUBreakIteratorFactory : public ICUResourceBundleFactory { 244 protected: 245 virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { 246 return BreakIterator::makeInstance(loc, kind, status); 247 } 248 }; 249 250 // ------------------------------------- 251 252 class ICUBreakIteratorService : public ICULocaleService { 253 public: 254 ICUBreakIteratorService() 255 : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) 256 { 257 UErrorCode status = U_ZERO_ERROR; 258 registerFactory(new ICUBreakIteratorFactory(), status); 259 } 260 261 virtual UObject* cloneInstance(UObject* instance) const { 262 return ((BreakIterator*)instance)->clone(); 263 } 264 265 virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { 266 LocaleKey& lkey = (LocaleKey&)key; 267 int32_t kind = lkey.kind(); 268 Locale loc; 269 lkey.currentLocale(loc); 270 return BreakIterator::makeInstance(loc, kind, status); 271 } 272 273 virtual UBool isDefault() const { 274 return countFactories() == 1; 275 } 276 }; 277 278 // ------------------------------------- 279 280 U_NAMESPACE_END 281 282 // defined in ucln_cmn.h 283 284 static U_NAMESPACE_QUALIFIER ICULocaleService* gService = NULL; 285 286 /** 287 * Release all static memory held by breakiterator. 288 */ 289 U_CDECL_BEGIN 290 static UBool U_CALLCONV breakiterator_cleanup(void) { 291 #if !UCONFIG_NO_SERVICE 292 if (gService) { 293 delete gService; 294 gService = NULL; 295 } 296 #endif 297 return TRUE; 298 } 299 U_CDECL_END 300 U_NAMESPACE_BEGIN 301 302 static ICULocaleService* 303 getService(void) 304 { 305 UBool needsInit; 306 UMTX_CHECK(NULL, (UBool)(gService == NULL), needsInit); 307 308 if (needsInit) { 309 ICULocaleService *tService = new ICUBreakIteratorService(); 310 umtx_lock(NULL); 311 if (gService == NULL) { 312 gService = tService; 313 tService = NULL; 314 ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); 315 } 316 umtx_unlock(NULL); 317 delete tService; 318 } 319 return gService; 320 } 321 322 // ------------------------------------- 323 324 static inline UBool 325 hasService(void) 326 { 327 UBool retVal; 328 UMTX_CHECK(NULL, gService != NULL, retVal); 329 return retVal; 330 } 331 332 // ------------------------------------- 333 334 URegistryKey U_EXPORT2 335 BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) 336 { 337 ICULocaleService *service = getService(); 338 if (service == NULL) { 339 status = U_MEMORY_ALLOCATION_ERROR; 340 return NULL; 341 } 342 return service->registerInstance(toAdopt, locale, kind, status); 343 } 344 345 // ------------------------------------- 346 347 UBool U_EXPORT2 348 BreakIterator::unregister(URegistryKey key, UErrorCode& status) 349 { 350 if (U_SUCCESS(status)) { 351 if (hasService()) { 352 return gService->unregister(key, status); 353 } 354 status = U_MEMORY_ALLOCATION_ERROR; 355 } 356 return FALSE; 357 } 358 359 // ------------------------------------- 360 361 StringEnumeration* U_EXPORT2 362 BreakIterator::getAvailableLocales(void) 363 { 364 ICULocaleService *service = getService(); 365 if (service == NULL) { 366 return NULL; 367 } 368 return service->getAvailableLocales(); 369 } 370 #endif /* UCONFIG_NO_SERVICE */ 371 372 // ------------------------------------- 373 374 BreakIterator* 375 BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) 376 { 377 if (U_FAILURE(status)) { 378 return NULL; 379 } 380 381 u_init(&status); 382 #if !UCONFIG_NO_SERVICE 383 if (hasService()) { 384 Locale actualLoc(""); 385 BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); 386 // TODO: The way the service code works in ICU 2.8 is that if 387 // there is a real registered break iterator, the actualLoc 388 // will be populated, but if the handleDefault path is taken 389 // (because nothing is registered that can handle the 390 // requested locale) then the actualLoc comes back empty. In 391 // that case, the returned object already has its actual/valid 392 // locale data populated (by makeInstance, which is what 393 // handleDefault calls), so we don't touch it. YES, A COMMENT 394 // THIS LONG is a sign of bad code -- so the action item is to 395 // revisit this in ICU 3.0 and clean it up/fix it/remove it. 396 if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { 397 U_LOCALE_BASED(locBased, *result); 398 locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); 399 } 400 return result; 401 } 402 else 403 #endif 404 { 405 return makeInstance(loc, kind, status); 406 } 407 } 408 409 // ------------------------------------- 410 411 BreakIterator* 412 BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) 413 { 414 415 if (U_FAILURE(status)) { 416 return NULL; 417 } 418 419 BreakIterator *result = NULL; 420 switch (kind) { 421 case UBRK_CHARACTER: 422 result = BreakIterator::buildInstance(loc, "grapheme", kind, status); 423 break; 424 case UBRK_WORD: 425 result = BreakIterator::buildInstance(loc, "word", kind, status); 426 break; 427 case UBRK_LINE: 428 result = BreakIterator::buildInstance(loc, "line", kind, status); 429 break; 430 case UBRK_SENTENCE: 431 result = BreakIterator::buildInstance(loc, "sentence", kind, status); 432 break; 433 case UBRK_TITLE: 434 result = BreakIterator::buildInstance(loc, "title", kind, status); 435 break; 436 default: 437 status = U_ILLEGAL_ARGUMENT_ERROR; 438 } 439 440 if (U_FAILURE(status)) { 441 return NULL; 442 } 443 444 return result; 445 } 446 447 Locale 448 BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { 449 U_LOCALE_BASED(locBased, *this); 450 return locBased.getLocale(type, status); 451 } 452 453 const char * 454 BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { 455 U_LOCALE_BASED(locBased, *this); 456 return locBased.getLocaleID(type, status); 457 } 458 459 U_NAMESPACE_END 460 461 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 462 463 //eof 464