Home | History | Annotate | Download | only in common
      1 /**
      2  ************************************************************************************
      3  * Copyright (C) 2006-2009,2011, International Business Machines Corporation        *
      4  * and others. All Rights Reserved.                                                 *
      5  ************************************************************************************
      6  */
      7 
      8 #include "unicode/utypes.h"
      9 
     10 #if !UCONFIG_NO_BREAK_ITERATION
     11 
     12 #include "brkeng.h"
     13 #include "dictbe.h"
     14 #include "triedict.h"
     15 #include "unicode/uchar.h"
     16 #include "unicode/uniset.h"
     17 #include "unicode/chariter.h"
     18 #include "unicode/ures.h"
     19 #include "unicode/udata.h"
     20 #include "unicode/putil.h"
     21 #include "unicode/ustring.h"
     22 #include "unicode/uscript.h"
     23 #include "uvector.h"
     24 #include "umutex.h"
     25 #include "uresimp.h"
     26 #include "ubrkimpl.h"
     27 
     28 U_NAMESPACE_BEGIN
     29 
     30 /*
     31  ******************************************************************
     32  */
     33 
     34 LanguageBreakEngine::LanguageBreakEngine() {
     35 }
     36 
     37 LanguageBreakEngine::~LanguageBreakEngine() {
     38 }
     39 
     40 /*
     41  ******************************************************************
     42  */
     43 
     44 LanguageBreakFactory::LanguageBreakFactory() {
     45 }
     46 
     47 LanguageBreakFactory::~LanguageBreakFactory() {
     48 }
     49 
     50 /*
     51  ******************************************************************
     52  */
     53 
     54 UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
     55     for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
     56         fHandled[i] = 0;
     57     }
     58 }
     59 
     60 UnhandledEngine::~UnhandledEngine() {
     61     for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
     62         if (fHandled[i] != 0) {
     63             delete fHandled[i];
     64         }
     65     }
     66 }
     67 
     68 UBool
     69 UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
     70     return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
     71         && fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
     72 }
     73 
     74 int32_t
     75 UnhandledEngine::findBreaks( UText *text,
     76                                  int32_t startPos,
     77                                  int32_t endPos,
     78                                  UBool reverse,
     79                                  int32_t breakType,
     80                                  UStack &/*foundBreaks*/ ) const {
     81     if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
     82         UChar32 c = utext_current32(text);
     83         if (reverse) {
     84             while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
     85                 c = utext_previous32(text);
     86             }
     87         }
     88         else {
     89             while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
     90                 utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
     91                 c = utext_current32(text);
     92             }
     93         }
     94     }
     95     return 0;
     96 }
     97 
     98 void
     99 UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
    100     if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
    101         if (fHandled[breakType] == 0) {
    102             fHandled[breakType] = new UnicodeSet();
    103             if (fHandled[breakType] == 0) {
    104                 return;
    105             }
    106         }
    107         if (!fHandled[breakType]->contains(c)) {
    108             UErrorCode status = U_ZERO_ERROR;
    109             // Apply the entire script of the character.
    110             int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
    111             fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
    112         }
    113     }
    114 }
    115 
    116 /*
    117  ******************************************************************
    118  */
    119 
    120 ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
    121     fEngines = 0;
    122 }
    123 
    124 ICULanguageBreakFactory::~ICULanguageBreakFactory() {
    125     if (fEngines != 0) {
    126         delete fEngines;
    127     }
    128 }
    129 
    130 U_NAMESPACE_END
    131 U_CDECL_BEGIN
    132 static void U_CALLCONV _deleteEngine(void *obj) {
    133     delete (const U_NAMESPACE_QUALIFIER LanguageBreakEngine *) obj;
    134 }
    135 U_CDECL_END
    136 U_NAMESPACE_BEGIN
    137 
    138 const LanguageBreakEngine *
    139 ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
    140     UBool       needsInit;
    141     int32_t     i;
    142     const LanguageBreakEngine *lbe = NULL;
    143     UErrorCode  status = U_ZERO_ERROR;
    144 
    145     // TODO: The global mutex should not be used.
    146     // The global mutex should only be used for short periods.
    147     // A ICULanguageBreakFactory specific mutex should be used.
    148     umtx_lock(NULL);
    149     needsInit = (UBool)(fEngines == NULL);
    150     if (!needsInit) {
    151         i = fEngines->size();
    152         while (--i >= 0) {
    153             lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
    154             if (lbe != NULL && lbe->handles(c, breakType)) {
    155                 break;
    156             }
    157             lbe = NULL;
    158         }
    159     }
    160     umtx_unlock(NULL);
    161 
    162     if (lbe != NULL) {
    163         return lbe;
    164     }
    165 
    166     if (needsInit) {
    167         UStack  *engines = new UStack(_deleteEngine, NULL, status);
    168         if (U_SUCCESS(status) && engines == NULL) {
    169             status = U_MEMORY_ALLOCATION_ERROR;
    170         }
    171         else if (U_FAILURE(status)) {
    172             delete engines;
    173             engines = NULL;
    174         }
    175         else {
    176             umtx_lock(NULL);
    177             if (fEngines == NULL) {
    178                 fEngines = engines;
    179                 engines = NULL;
    180             }
    181             umtx_unlock(NULL);
    182             delete engines;
    183         }
    184     }
    185 
    186     if (fEngines == NULL) {
    187         return NULL;
    188     }
    189 
    190     // We didn't find an engine the first time through, or there was no
    191     // stack. Create an engine.
    192     const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType);
    193 
    194     // Now get the lock, and see if someone else has created it in the
    195     // meantime
    196     umtx_lock(NULL);
    197     i = fEngines->size();
    198     while (--i >= 0) {
    199         lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
    200         if (lbe != NULL && lbe->handles(c, breakType)) {
    201             break;
    202         }
    203         lbe = NULL;
    204     }
    205     if (lbe == NULL && newlbe != NULL) {
    206         fEngines->push((void *)newlbe, status);
    207         lbe = newlbe;
    208         newlbe = NULL;
    209     }
    210     umtx_unlock(NULL);
    211 
    212     delete newlbe;
    213 
    214     return lbe;
    215 }
    216 
    217 const LanguageBreakEngine *
    218 ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
    219     UErrorCode status = U_ZERO_ERROR;
    220     UScriptCode code = uscript_getScript(c, &status);
    221     if (U_SUCCESS(status)) {
    222         const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType);
    223         if (dict != NULL) {
    224             const LanguageBreakEngine *engine = NULL;
    225             switch(code) {
    226             case USCRIPT_THAI:
    227                 engine = new ThaiBreakEngine(dict, status);
    228                 break;
    229             case USCRIPT_KHMER:
    230                 engine = new KhmerBreakEngine(dict, status);
    231                 break;
    232             default:
    233                 break;
    234             }
    235             if (engine == NULL) {
    236                 delete dict;
    237             }
    238             else if (U_FAILURE(status)) {
    239                 delete engine;
    240                 engine = NULL;
    241             }
    242             return engine;
    243         }
    244     }
    245     return NULL;
    246 }
    247 
    248 const CompactTrieDictionary *
    249 ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) {
    250     UErrorCode status = U_ZERO_ERROR;
    251     // Open root from brkitr tree.
    252     char dictnbuff[256];
    253     char ext[4]={'\0'};
    254 
    255     UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
    256     b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
    257     b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status);
    258     int32_t dictnlength = 0;
    259     const UChar *dictfname = ures_getString(b, &dictnlength, &status);
    260     if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) {
    261         dictnlength = 0;
    262         status = U_BUFFER_OVERFLOW_ERROR;
    263     }
    264     if (U_SUCCESS(status) && dictfname) {
    265         UChar* extStart=u_strchr(dictfname, 0x002e);
    266         int len = 0;
    267         if(extStart!=NULL){
    268             len = (int)(extStart-dictfname);
    269             u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
    270             u_UCharsToChars(dictfname, dictnbuff, len);
    271         }
    272         dictnbuff[len]=0; // nul terminate
    273     }
    274     ures_close(b);
    275     UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status);
    276     if (U_SUCCESS(status)) {
    277         const CompactTrieDictionary *dict = new CompactTrieDictionary(
    278             file, status);
    279         if (U_SUCCESS(status) && dict == NULL) {
    280             status = U_MEMORY_ALLOCATION_ERROR;
    281         }
    282         if (U_FAILURE(status)) {
    283             delete dict;
    284             dict = NULL;
    285         }
    286         return dict;
    287     }
    288     return NULL;
    289 }
    290 
    291 U_NAMESPACE_END
    292 
    293 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    294