1 /** 2 ************************************************************************************ 3 * Copyright (C) 2006-2009, International Business Machines Corporation and others. * 4 * All Rights Reserved. * 5 ************************************************************************************ 6 */ 7 8 #include "unicode/utypes.h" 9 10 #if !UCONFIG_NO_BREAK_ITERATION 11 12 #include "brkeng.h" 13 #include "dictbe.h" 14 #include "triedict.h" 15 #include "unicode/uchar.h" 16 #include "unicode/uniset.h" 17 #include "unicode/chariter.h" 18 #include "unicode/ures.h" 19 #include "unicode/udata.h" 20 #include "unicode/putil.h" 21 #include "unicode/ustring.h" 22 #include "unicode/uscript.h" 23 #include "uvector.h" 24 #include "umutex.h" 25 #include "uresimp.h" 26 #include "ubrkimpl.h" 27 28 U_NAMESPACE_BEGIN 29 30 /* 31 ****************************************************************** 32 */ 33 34 LanguageBreakEngine::LanguageBreakEngine() { 35 } 36 37 LanguageBreakEngine::~LanguageBreakEngine() { 38 } 39 40 /* 41 ****************************************************************** 42 */ 43 44 LanguageBreakFactory::LanguageBreakFactory() { 45 } 46 47 LanguageBreakFactory::~LanguageBreakFactory() { 48 } 49 50 /* 51 ****************************************************************** 52 */ 53 54 UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { 55 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { 56 fHandled[i] = 0; 57 } 58 } 59 60 UnhandledEngine::~UnhandledEngine() { 61 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { 62 if (fHandled[i] != 0) { 63 delete fHandled[i]; 64 } 65 } 66 } 67 68 UBool 69 UnhandledEngine::handles(UChar32 c, int32_t breakType) const { 70 return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])) 71 && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); 72 } 73 74 int32_t 75 UnhandledEngine::findBreaks( UText *text, 76 int32_t startPos, 77 int32_t endPos, 78 UBool reverse, 79 int32_t breakType, 80 UStack &/*foundBreaks*/ ) const { 81 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { 82 UChar32 c = utext_current32(text); 83 if (reverse) { 84 while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) { 85 c = utext_previous32(text); 86 } 87 } 88 else { 89 while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { 90 utext_next32(text); // TODO: recast loop to work with post-increment operations. 91 c = utext_current32(text); 92 } 93 } 94 } 95 return 0; 96 } 97 98 void 99 UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { 100 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { 101 if (fHandled[breakType] == 0) { 102 fHandled[breakType] = new UnicodeSet(); 103 if (fHandled[breakType] == 0) { 104 return; 105 } 106 } 107 if (!fHandled[breakType]->contains(c)) { 108 UErrorCode status = U_ZERO_ERROR; 109 // Apply the entire script of the character. 110 int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); 111 fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); 112 } 113 } 114 } 115 116 /* 117 ****************************************************************** 118 */ 119 120 ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { 121 fEngines = 0; 122 } 123 124 ICULanguageBreakFactory::~ICULanguageBreakFactory() { 125 if (fEngines != 0) { 126 delete fEngines; 127 } 128 } 129 130 U_NAMESPACE_END 131 U_CDECL_BEGIN 132 static void U_CALLCONV _deleteEngine(void *obj) { 133 delete (const U_NAMESPACE_QUALIFIER LanguageBreakEngine *) obj; 134 } 135 U_CDECL_END 136 U_NAMESPACE_BEGIN 137 138 const LanguageBreakEngine * 139 ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { 140 UBool needsInit; 141 int32_t i; 142 const LanguageBreakEngine *lbe = NULL; 143 UErrorCode status = U_ZERO_ERROR; 144 145 // TODO: The global mutex should not be used. 146 // The global mutex should only be used for short periods. 147 // A ICULanguageBreakFactory specific mutex should be used. 148 umtx_lock(NULL); 149 needsInit = (UBool)(fEngines == NULL); 150 if (!needsInit) { 151 i = fEngines->size(); 152 while (--i >= 0) { 153 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); 154 if (lbe != NULL && lbe->handles(c, breakType)) { 155 break; 156 } 157 lbe = NULL; 158 } 159 } 160 umtx_unlock(NULL); 161 162 if (lbe != NULL) { 163 return lbe; 164 } 165 166 if (needsInit) { 167 UStack *engines = new UStack(_deleteEngine, NULL, status); 168 if (U_SUCCESS(status) && engines == NULL) { 169 status = U_MEMORY_ALLOCATION_ERROR; 170 } 171 else if (U_FAILURE(status)) { 172 delete engines; 173 engines = NULL; 174 } 175 else { 176 umtx_lock(NULL); 177 if (fEngines == NULL) { 178 fEngines = engines; 179 engines = NULL; 180 } 181 umtx_unlock(NULL); 182 delete engines; 183 } 184 } 185 186 if (fEngines == NULL) { 187 return NULL; 188 } 189 190 // We didn't find an engine the first time through, or there was no 191 // stack. Create an engine. 192 const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType); 193 194 // Now get the lock, and see if someone else has created it in the 195 // meantime 196 umtx_lock(NULL); 197 i = fEngines->size(); 198 while (--i >= 0) { 199 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); 200 if (lbe != NULL && lbe->handles(c, breakType)) { 201 break; 202 } 203 lbe = NULL; 204 } 205 if (lbe == NULL && newlbe != NULL) { 206 fEngines->push((void *)newlbe, status); 207 lbe = newlbe; 208 newlbe = NULL; 209 } 210 umtx_unlock(NULL); 211 212 delete newlbe; 213 214 return lbe; 215 } 216 217 const LanguageBreakEngine * 218 ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { 219 UErrorCode status = U_ZERO_ERROR; 220 UScriptCode code = uscript_getScript(c, &status); 221 if (U_SUCCESS(status)) { 222 const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType); 223 if (dict != NULL) { 224 const LanguageBreakEngine *engine = NULL; 225 switch(code) { 226 case USCRIPT_THAI: 227 engine = new ThaiBreakEngine(dict, status); 228 break; 229 default: 230 break; 231 } 232 if (engine == NULL) { 233 delete dict; 234 } 235 else if (U_FAILURE(status)) { 236 delete engine; 237 engine = NULL; 238 } 239 return engine; 240 } 241 } 242 return NULL; 243 } 244 245 const CompactTrieDictionary * 246 ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) { 247 UErrorCode status = U_ZERO_ERROR; 248 // Open root from brkitr tree. 249 char dictnbuff[256]; 250 char ext[4]={'\0'}; 251 252 UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); 253 b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); 254 b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status); 255 int32_t dictnlength = 0; 256 const UChar *dictfname = ures_getString(b, &dictnlength, &status); 257 if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) { 258 dictnlength = 0; 259 status = U_BUFFER_OVERFLOW_ERROR; 260 } 261 if (U_SUCCESS(status) && dictfname) { 262 UChar* extStart=u_strchr(dictfname, 0x002e); 263 int len = 0; 264 if(extStart!=NULL){ 265 len = (int)(extStart-dictfname); 266 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff 267 u_UCharsToChars(dictfname, dictnbuff, len); 268 } 269 dictnbuff[len]=0; // nul terminate 270 } 271 ures_close(b); 272 UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status); 273 if (U_SUCCESS(status)) { 274 const CompactTrieDictionary *dict = new CompactTrieDictionary( 275 file, status); 276 if (U_SUCCESS(status) && dict == NULL) { 277 status = U_MEMORY_ALLOCATION_ERROR; 278 } 279 if (U_FAILURE(status)) { 280 delete dict; 281 dict = NULL; 282 } 283 return dict; 284 } 285 return NULL; 286 } 287 288 U_NAMESPACE_END 289 290 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 291