Home | History | Annotate | Download | only in i18n
      1 /********************************************************************
      2  * COPYRIGHT:
      3  * Copyright (c) 1997-2010, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  * Copyright (C) 2010 , Yahoo! Inc.
      6  ********************************************************************
      7  *
      8  * File SELFMT.CPP
      9  *
     10  * Modification History:
     11  *
     12  *   Date        Name        Description
     13  *   11/11/09    kirtig      Finished first cut of implementation.
     14  *   11/16/09    kirtig      Improved version
     15  ********************************************************************/
     16 
     17 #include "unicode/utypeinfo.h"  // for 'typeid' to work
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/ustring.h"
     21 #include "unicode/ucnv_err.h"
     22 #include "unicode/uchar.h"
     23 #include "unicode/umsg.h"
     24 #include "unicode/rbnf.h"
     25 #include "cmemory.h"
     26 #include "util.h"
     27 #include "uassert.h"
     28 #include "ustrfmt.h"
     29 #include "uvector.h"
     30 
     31 #include "unicode/selfmt.h"
     32 #include "selfmtimpl.h"
     33 
     34 #if !UCONFIG_NO_FORMATTING
     35 
     36 U_NAMESPACE_BEGIN
     37 
     38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat)
     39 
     40 #define MAX_KEYWORD_SIZE 30
     41 static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0};
     42 
     43 SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) : parsedValuesHash(NULL) {
     44    if (U_FAILURE(status)) {
     45       return;
     46    }
     47    initHashTable(status);
     48    applyPattern(pat, status);
     49 }
     50 
     51 SelectFormat::SelectFormat(const SelectFormat& other) : Format(other), parsedValuesHash(NULL) {
     52    UErrorCode status = U_ZERO_ERROR;
     53    pattern = other.pattern;
     54    copyHashtable(other.parsedValuesHash, status);
     55 }
     56 
     57 SelectFormat::~SelectFormat() {
     58   cleanHashTable();
     59 }
     60 
     61 void SelectFormat::initHashTable(UErrorCode &status) {
     62   if (U_FAILURE(status)) {
     63     return;
     64   }
     65   // has inited
     66   if (parsedValuesHash != NULL) {
     67     return;
     68   }
     69 
     70   parsedValuesHash = new Hashtable(TRUE, status);
     71   if (U_FAILURE(status)) {
     72     cleanHashTable();
     73     return;
     74   } else {
     75     if (parsedValuesHash == NULL) {
     76       status = U_MEMORY_ALLOCATION_ERROR;
     77       return;
     78     }
     79   }
     80   // to use hashtable->equals(), must set Value Compartor.
     81   parsedValuesHash->setValueComparator(uhash_compareCaselessUnicodeString);
     82 }
     83 
     84 void SelectFormat::cleanHashTable() {
     85   if (parsedValuesHash != NULL) {
     86     delete parsedValuesHash;
     87     parsedValuesHash = NULL;
     88   }
     89 }
     90 
     91 void
     92 SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
     93     if (U_FAILURE(status)) {
     94       return;
     95     }
     96 
     97     pattern = newPattern;
     98     enum State{ startState, keywordState, pastKeywordState, phraseState};
     99 
    100     //Initialization
    101     UnicodeString keyword ;
    102     UnicodeString phrase ;
    103     UnicodeString* ptrPhrase ;
    104     int32_t braceCount = 0;
    105 
    106     if (parsedValuesHash == NULL) {
    107       initHashTable(status);
    108       if (U_FAILURE(status)) {
    109         return;
    110       }
    111     }
    112     parsedValuesHash->removeAll();
    113     parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
    114 
    115     //Process the state machine
    116     State state = startState;
    117     for (int32_t i = 0; i < pattern.length(); ++i) {
    118         //Get the character and check its type
    119         UChar ch = pattern.charAt(i);
    120         CharacterClass type = classifyCharacter(ch);
    121 
    122         //Allow any character in phrase but nowhere else
    123         if ( type == tOther ) {
    124             if ( state == phraseState ){
    125                 phrase += ch;
    126                 continue;
    127             }else {
    128                 status = U_PATTERN_SYNTAX_ERROR;
    129                 cleanHashTable();
    130                 return;
    131             }
    132         }
    133 
    134         //Process the state machine
    135         switch (state) {
    136             //At the start of pattern
    137             case startState:
    138                 switch (type) {
    139                     case tSpace:
    140                         break;
    141                     case tStartKeyword:
    142                         state = keywordState;
    143                         keyword += ch;
    144                         break;
    145                     //If anything else is encountered, it's a syntax error
    146                     default:
    147                         status = U_PATTERN_SYNTAX_ERROR;
    148                         cleanHashTable();
    149                         return;
    150                 }//end of switch(type)
    151                 break;
    152 
    153             //Handle the keyword state
    154             case keywordState:
    155                 switch (type) {
    156                     case tSpace:
    157                         state = pastKeywordState;
    158                         break;
    159                     case tStartKeyword:
    160                     case tContinueKeyword:
    161                         keyword += ch;
    162                         break;
    163                     case tLeftBrace:
    164                         state = phraseState;
    165                         break;
    166                     //If anything else is encountered, it's a syntax error
    167                     default:
    168                         status = U_PATTERN_SYNTAX_ERROR;
    169                         cleanHashTable();
    170                         return;
    171                 }//end of switch(type)
    172                 break;
    173 
    174             //Handle the pastkeyword state
    175             case pastKeywordState:
    176                 switch (type) {
    177                     case tSpace:
    178                         break;
    179                     case tLeftBrace:
    180                         state = phraseState;
    181                         break;
    182                     //If anything else is encountered, it's a syntax error
    183                     default:
    184                         status = U_PATTERN_SYNTAX_ERROR;
    185                         cleanHashTable();
    186                         return;
    187                 }//end of switch(type)
    188                 break;
    189 
    190             //Handle the phrase state
    191             case phraseState:
    192                 switch (type) {
    193                     case tLeftBrace:
    194                         braceCount++;
    195                         phrase += ch;
    196                         break;
    197                     case tRightBrace:
    198                         //Matching keyword, phrase pair found
    199                         if (braceCount == 0){
    200                             //Check validity of keyword
    201                             if (parsedValuesHash->get(keyword) != NULL) {
    202                                 status = U_DUPLICATE_KEYWORD;
    203                                 cleanHashTable();
    204                                 return;
    205                             }
    206                             if (keyword.length() == 0) {
    207                                 status = U_PATTERN_SYNTAX_ERROR;
    208                                 cleanHashTable();
    209                                 return;
    210                             }
    211 
    212                             //Store the keyword, phrase pair in hashTable
    213                             ptrPhrase = new UnicodeString(phrase);
    214                             parsedValuesHash->put( keyword, ptrPhrase, status);
    215 
    216                             //Reinitialize
    217                             keyword.remove();
    218                             phrase.remove();
    219                             ptrPhrase = NULL;
    220                             state = startState;
    221                         }
    222 
    223                         if (braceCount > 0){
    224                             braceCount-- ;
    225                             phrase += ch;
    226                         }
    227                         break;
    228                     default:
    229                         phrase += ch;
    230                 }//end of switch(type)
    231                 break;
    232 
    233             //Handle the  default case of switch(state)
    234             default:
    235                 status = U_PATTERN_SYNTAX_ERROR;
    236                 cleanHashTable();
    237                 return;
    238 
    239         }//end of switch(state)
    240     }
    241 
    242     //Check if the state machine is back to startState
    243     if ( state != startState){
    244         status = U_PATTERN_SYNTAX_ERROR;
    245         cleanHashTable();
    246         return;
    247     }
    248 
    249     //Check if "other" keyword is present
    250     if ( !checkSufficientDefinition() ) {
    251         status = U_DEFAULT_KEYWORD_MISSING;
    252         cleanHashTable();
    253     }
    254     return;
    255 }
    256 
    257 UnicodeString&
    258 SelectFormat::format(const Formattable& obj,
    259                    UnicodeString& appendTo,
    260                    FieldPosition& pos,
    261                    UErrorCode& status) const
    262 {
    263     switch (obj.getType())
    264     {
    265     case Formattable::kString:
    266         return format(obj.getString(), appendTo, pos, status);
    267     default:
    268         if( U_SUCCESS(status) ){
    269             status = U_ILLEGAL_ARGUMENT_ERROR;
    270         }
    271         return appendTo;
    272     }
    273 }
    274 
    275 UnicodeString&
    276 SelectFormat::format(const UnicodeString& keyword,
    277                      UnicodeString& appendTo,
    278                      FieldPosition& /*pos */,
    279                      UErrorCode& status) const {
    280 
    281     if (U_FAILURE(status)) return appendTo;
    282 
    283     if (parsedValuesHash == NULL) {
    284         status = U_INVALID_FORMAT_ERROR;
    285         return appendTo;
    286     }
    287 
    288     //Check for the validity of the keyword
    289     if ( !checkValidKeyword(keyword) ){
    290         status = U_ILLEGAL_ARGUMENT_ERROR;
    291         return appendTo;
    292     }
    293 
    294     UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword);
    295     if (selectedPattern == NULL) {
    296         selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER);
    297     }
    298 
    299     return appendTo += *selectedPattern;
    300 }
    301 
    302 UnicodeString&
    303 SelectFormat::toPattern(UnicodeString& appendTo) {
    304     return appendTo += pattern;
    305 }
    306 
    307 SelectFormat::CharacterClass
    308 SelectFormat::classifyCharacter(UChar ch) const{
    309     if ((ch >= CAP_A) && (ch <= CAP_Z)) {
    310         return tStartKeyword;
    311     }
    312     if ((ch >= LOW_A) && (ch <= LOW_Z)) {
    313         return tStartKeyword;
    314     }
    315     if ((ch >= U_ZERO) && (ch <= U_NINE)) {
    316         return tContinueKeyword;
    317     }
    318     if ( uprv_isRuleWhiteSpace(ch) ){
    319         return tSpace;
    320     }
    321     switch (ch) {
    322         case LEFTBRACE:
    323             return tLeftBrace;
    324         case RIGHTBRACE:
    325             return tRightBrace;
    326         case HYPHEN:
    327         case LOWLINE:
    328             return tContinueKeyword;
    329         default :
    330             return tOther;
    331     }
    332 }
    333 
    334 UBool
    335 SelectFormat::checkSufficientDefinition() {
    336     // Check that at least the default rule is defined.
    337     return (parsedValuesHash != NULL &&
    338            parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ;
    339 }
    340 
    341 UBool
    342 SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{
    343     int32_t len = argKeyword.length();
    344     if (len < 1){
    345         return FALSE;
    346     }
    347     CharacterClass type = classifyCharacter(argKeyword.charAt(0));
    348     if( type != tStartKeyword ){
    349         return FALSE;
    350     }
    351 
    352     for (int32_t i = 0; i < argKeyword.length(); ++i) {
    353         type = classifyCharacter(argKeyword.charAt(i));
    354         if( type != tStartKeyword && type != tContinueKeyword ){
    355             return FALSE;
    356         }
    357     }
    358     return TRUE;
    359 }
    360 
    361 Format* SelectFormat::clone() const
    362 {
    363     return new SelectFormat(*this);
    364 }
    365 
    366 SelectFormat&
    367 SelectFormat::operator=(const SelectFormat& other) {
    368     if (this != &other) {
    369         UErrorCode status = U_ZERO_ERROR;
    370         pattern = other.pattern;
    371         copyHashtable(other.parsedValuesHash, status);
    372     }
    373     return *this;
    374 }
    375 
    376 UBool
    377 SelectFormat::operator==(const Format& other) const {
    378     if( this == &other){
    379         return TRUE;
    380     }
    381     if (typeid(*this) != typeid(other)) {
    382         return  FALSE;
    383     }
    384     SelectFormat* fmt = (SelectFormat*)&other;
    385     Hashtable* hashOther = fmt->parsedValuesHash;
    386     if ( parsedValuesHash == NULL && hashOther == NULL)
    387         return TRUE;
    388     if ( parsedValuesHash == NULL || hashOther == NULL)
    389         return FALSE;
    390     return parsedValuesHash->equals(*hashOther);
    391 }
    392 
    393 UBool
    394 SelectFormat::operator!=(const Format& other) const {
    395     return  !operator==(other);
    396 }
    397 
    398 void
    399 SelectFormat::parseObject(const UnicodeString& /*source*/,
    400                         Formattable& /*result*/,
    401                         ParsePosition& pos) const
    402 {
    403     // TODO: not yet supported in icu4j and icu4c
    404     pos.setErrorIndex(pos.getIndex());
    405 }
    406 
    407 void
    408 SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
    409     if (U_FAILURE(status)) {
    410       return;
    411     }
    412     if (other == NULL) {
    413       cleanHashTable();
    414       return;
    415     }
    416     if (parsedValuesHash == NULL) {
    417       initHashTable(status);
    418       if (U_FAILURE(status)) {
    419         return;
    420       }
    421     }
    422 
    423     parsedValuesHash->removeAll();
    424     parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
    425 
    426     int32_t pos = -1;
    427     const UHashElement* elem = NULL;
    428 
    429     // walk through the hash table and create a deep clone
    430     while ((elem = other->nextElement(pos)) != NULL){
    431         const UHashTok otherKeyTok = elem->key;
    432         UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
    433         const UHashTok otherKeyToVal = elem->value;
    434         UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
    435         parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
    436         if (U_FAILURE(status)){
    437             cleanHashTable();
    438             return;
    439         }
    440     }
    441 }
    442 
    443 U_NAMESPACE_END
    444 
    445 #endif /* #if !UCONFIG_NO_FORMATTING */
    446 
    447 //eof
    448