Home | History | Annotate | Download | only in common
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ********************************************************************************
      5 *   Copyright (C) 1996-2015, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 ********************************************************************************
      8 */
      9 
     10 #include "unicode/utypes.h"
     11 
     12 #if !UCONFIG_NO_BREAK_ITERATION
     13 
     14 #include "unicode/ubrk.h"
     15 
     16 #include "unicode/brkiter.h"
     17 #include "unicode/uloc.h"
     18 #include "unicode/ustring.h"
     19 #include "unicode/uchriter.h"
     20 #include "unicode/rbbi.h"
     21 #include "rbbirb.h"
     22 #include "uassert.h"
     23 
     24 U_NAMESPACE_USE
     25 
     26 //------------------------------------------------------------------------------
     27 //
     28 //    ubrk_open      Create a canned type of break iterator based on type (word, line, etc.)
     29 //                   and locale.
     30 //
     31 //------------------------------------------------------------------------------
     32 U_CAPI UBreakIterator* U_EXPORT2
     33 ubrk_open(UBreakIteratorType type,
     34       const char *locale,
     35       const UChar *text,
     36       int32_t textLength,
     37       UErrorCode *status)
     38 {
     39 
     40   if(U_FAILURE(*status)) return 0;
     41 
     42   BreakIterator *result = 0;
     43 
     44   switch(type) {
     45 
     46   case UBRK_CHARACTER:
     47     result = BreakIterator::createCharacterInstance(Locale(locale), *status);
     48     break;
     49 
     50   case UBRK_WORD:
     51     result = BreakIterator::createWordInstance(Locale(locale), *status);
     52     break;
     53 
     54   case UBRK_LINE:
     55     result = BreakIterator::createLineInstance(Locale(locale), *status);
     56     break;
     57 
     58   case UBRK_SENTENCE:
     59     result = BreakIterator::createSentenceInstance(Locale(locale), *status);
     60     break;
     61 
     62   case UBRK_TITLE:
     63     result = BreakIterator::createTitleInstance(Locale(locale), *status);
     64     break;
     65 
     66   default:
     67     *status = U_ILLEGAL_ARGUMENT_ERROR;
     68   }
     69 
     70   // check for allocation error
     71   if (U_FAILURE(*status)) {
     72      return 0;
     73   }
     74   if(result == 0) {
     75     *status = U_MEMORY_ALLOCATION_ERROR;
     76     return 0;
     77   }
     78 
     79 
     80   UBreakIterator *uBI = (UBreakIterator *)result;
     81   if (text != NULL) {
     82       ubrk_setText(uBI, text, textLength, status);
     83   }
     84   return uBI;
     85 }
     86 
     87 
     88 
     89 //------------------------------------------------------------------------------
     90 //
     91 //   ubrk_openRules      open a break iterator from a set of break rules.
     92 //                       Invokes the rule builder.
     93 //
     94 //------------------------------------------------------------------------------
     95 U_CAPI UBreakIterator* U_EXPORT2
     96 ubrk_openRules(  const UChar        *rules,
     97                        int32_t       rulesLength,
     98                  const UChar        *text,
     99                        int32_t       textLength,
    100                        UParseError  *parseErr,
    101                        UErrorCode   *status)  {
    102 
    103     if (status == NULL || U_FAILURE(*status)){
    104         return 0;
    105     }
    106 
    107     BreakIterator *result = 0;
    108     UnicodeString ruleString(rules, rulesLength);
    109     result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
    110     if(U_FAILURE(*status)) {
    111         return 0;
    112     }
    113 
    114     UBreakIterator *uBI = (UBreakIterator *)result;
    115     if (text != NULL) {
    116         ubrk_setText(uBI, text, textLength, status);
    117     }
    118     return uBI;
    119 }
    120 
    121 
    122 
    123 
    124 
    125 U_CAPI UBreakIterator * U_EXPORT2
    126 ubrk_safeClone(
    127           const UBreakIterator *bi,
    128           void * /*stackBuffer*/,
    129           int32_t *pBufferSize,
    130           UErrorCode *status)
    131 {
    132     if (status == NULL || U_FAILURE(*status)){
    133         return NULL;
    134     }
    135     if (bi == NULL) {
    136        *status = U_ILLEGAL_ARGUMENT_ERROR;
    137         return NULL;
    138     }
    139     if (pBufferSize != NULL) {
    140         int32_t inputSize = *pBufferSize;
    141         *pBufferSize = 1;
    142         if (inputSize == 0) {
    143             return NULL;  // preflighting for deprecated functionality
    144         }
    145     }
    146     BreakIterator *newBI = ((BreakIterator *)bi)->clone();
    147     if (newBI == NULL) {
    148         *status = U_MEMORY_ALLOCATION_ERROR;
    149     } else {
    150         *status = U_SAFECLONE_ALLOCATED_WARNING;
    151     }
    152     return (UBreakIterator *)newBI;
    153 }
    154 
    155 
    156 
    157 U_CAPI void U_EXPORT2
    158 ubrk_close(UBreakIterator *bi)
    159 {
    160     delete (BreakIterator *)bi;
    161 }
    162 
    163 U_CAPI void U_EXPORT2
    164 ubrk_setText(UBreakIterator* bi,
    165              const UChar*    text,
    166              int32_t         textLength,
    167              UErrorCode*     status)
    168 {
    169     UText  ut = UTEXT_INITIALIZER;
    170     utext_openUChars(&ut, text, textLength, status);
    171     ((BreakIterator*)bi)->setText(&ut, *status);
    172     // A stack allocated UText wrapping a UChar * string
    173     //   can be dumped without explicitly closing it.
    174 }
    175 
    176 
    177 
    178 U_CAPI void U_EXPORT2
    179 ubrk_setUText(UBreakIterator *bi,
    180              UText          *text,
    181              UErrorCode     *status)
    182 {
    183   ((BreakIterator*)bi)->setText(text, *status);
    184 }
    185 
    186 
    187 
    188 
    189 
    190 U_CAPI int32_t U_EXPORT2
    191 ubrk_current(const UBreakIterator *bi)
    192 {
    193 
    194   return ((BreakIterator*)bi)->current();
    195 }
    196 
    197 U_CAPI int32_t U_EXPORT2
    198 ubrk_next(UBreakIterator *bi)
    199 {
    200 
    201   return ((BreakIterator*)bi)->next();
    202 }
    203 
    204 U_CAPI int32_t U_EXPORT2
    205 ubrk_previous(UBreakIterator *bi)
    206 {
    207 
    208   return ((BreakIterator*)bi)->previous();
    209 }
    210 
    211 U_CAPI int32_t U_EXPORT2
    212 ubrk_first(UBreakIterator *bi)
    213 {
    214 
    215   return ((BreakIterator*)bi)->first();
    216 }
    217 
    218 U_CAPI int32_t U_EXPORT2
    219 ubrk_last(UBreakIterator *bi)
    220 {
    221 
    222   return ((BreakIterator*)bi)->last();
    223 }
    224 
    225 U_CAPI int32_t U_EXPORT2
    226 ubrk_preceding(UBreakIterator *bi,
    227            int32_t offset)
    228 {
    229 
    230   return ((BreakIterator*)bi)->preceding(offset);
    231 }
    232 
    233 U_CAPI int32_t U_EXPORT2
    234 ubrk_following(UBreakIterator *bi,
    235            int32_t offset)
    236 {
    237 
    238   return ((BreakIterator*)bi)->following(offset);
    239 }
    240 
    241 U_CAPI const char* U_EXPORT2
    242 ubrk_getAvailable(int32_t index)
    243 {
    244 
    245   return uloc_getAvailable(index);
    246 }
    247 
    248 U_CAPI int32_t U_EXPORT2
    249 ubrk_countAvailable()
    250 {
    251 
    252   return uloc_countAvailable();
    253 }
    254 
    255 
    256 U_CAPI  UBool U_EXPORT2
    257 ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
    258 {
    259     return ((BreakIterator*)bi)->isBoundary(offset);
    260 }
    261 
    262 
    263 U_CAPI  int32_t U_EXPORT2
    264 ubrk_getRuleStatus(UBreakIterator *bi)
    265 {
    266     return ((BreakIterator*)bi)->getRuleStatus();
    267 }
    268 
    269 U_CAPI  int32_t U_EXPORT2
    270 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
    271 {
    272     return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
    273 }
    274 
    275 
    276 U_CAPI const char* U_EXPORT2
    277 ubrk_getLocaleByType(const UBreakIterator *bi,
    278                      ULocDataLocaleType type,
    279                      UErrorCode* status)
    280 {
    281     if (bi == NULL) {
    282         if (U_SUCCESS(*status)) {
    283             *status = U_ILLEGAL_ARGUMENT_ERROR;
    284         }
    285         return NULL;
    286     }
    287     return ((BreakIterator*)bi)->getLocaleID(type, *status);
    288 }
    289 
    290 
    291 void ubrk_refreshUText(UBreakIterator *bi,
    292                        UText          *text,
    293                        UErrorCode     *status)
    294 {
    295     BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
    296     bii->refreshInputText(text, *status);
    297 }
    298 
    299 
    300 
    301 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
    302