1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 * Copyright (C) 2010 , Yahoo! Inc. 6 ******************************************************************** 7 * 8 * File SELFMT.CPP 9 * 10 * Modification History: 11 * 12 * Date Name Description 13 * 11/11/09 kirtig Finished first cut of implementation. 14 * 11/16/09 kirtig Improved version 15 ********************************************************************/ 16 17 #include "unicode/utypeinfo.h" // for 'typeid' to work 18 19 #include "unicode/utypes.h" 20 #include "unicode/ustring.h" 21 #include "unicode/ucnv_err.h" 22 #include "unicode/uchar.h" 23 #include "unicode/umsg.h" 24 #include "unicode/rbnf.h" 25 #include "cmemory.h" 26 #include "util.h" 27 #include "uassert.h" 28 #include "ustrfmt.h" 29 #include "uvector.h" 30 31 #include "unicode/selfmt.h" 32 #include "selfmtimpl.h" 33 34 #if !UCONFIG_NO_FORMATTING 35 36 U_NAMESPACE_BEGIN 37 38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat) 39 40 #define MAX_KEYWORD_SIZE 30 41 static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0}; 42 43 SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) : parsedValuesHash(NULL) { 44 if (U_FAILURE(status)) { 45 return; 46 } 47 initHashTable(status); 48 applyPattern(pat, status); 49 } 50 51 SelectFormat::SelectFormat(const SelectFormat& other) : Format(other), parsedValuesHash(NULL) { 52 UErrorCode status = U_ZERO_ERROR; 53 pattern = other.pattern; 54 copyHashtable(other.parsedValuesHash, status); 55 } 56 57 SelectFormat::~SelectFormat() { 58 cleanHashTable(); 59 } 60 61 void SelectFormat::initHashTable(UErrorCode &status) { 62 if (U_FAILURE(status)) { 63 return; 64 } 65 // has inited 66 if (parsedValuesHash != NULL) { 67 return; 68 } 69 70 parsedValuesHash = new Hashtable(TRUE, status); 71 if (U_FAILURE(status)) { 72 cleanHashTable(); 73 return; 74 } else { 75 if (parsedValuesHash == NULL) { 76 status = U_MEMORY_ALLOCATION_ERROR; 77 return; 78 } 79 } 80 // to use hashtable->equals(), must set Value Compartor. 81 parsedValuesHash->setValueComparator(uhash_compareCaselessUnicodeString); 82 } 83 84 void SelectFormat::cleanHashTable() { 85 if (parsedValuesHash != NULL) { 86 delete parsedValuesHash; 87 parsedValuesHash = NULL; 88 } 89 } 90 91 void 92 SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { 93 if (U_FAILURE(status)) { 94 return; 95 } 96 97 pattern = newPattern; 98 enum State{ startState, keywordState, pastKeywordState, phraseState}; 99 100 //Initialization 101 UnicodeString keyword ; 102 UnicodeString phrase ; 103 UnicodeString* ptrPhrase ; 104 int32_t braceCount = 0; 105 106 if (parsedValuesHash == NULL) { 107 initHashTable(status); 108 if (U_FAILURE(status)) { 109 return; 110 } 111 } 112 parsedValuesHash->removeAll(); 113 parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString); 114 115 //Process the state machine 116 State state = startState; 117 for (int32_t i = 0; i < pattern.length(); ++i) { 118 //Get the character and check its type 119 UChar ch = pattern.charAt(i); 120 CharacterClass type = classifyCharacter(ch); 121 122 //Allow any character in phrase but nowhere else 123 if ( type == tOther ) { 124 if ( state == phraseState ){ 125 phrase += ch; 126 continue; 127 }else { 128 status = U_PATTERN_SYNTAX_ERROR; 129 cleanHashTable(); 130 return; 131 } 132 } 133 134 //Process the state machine 135 switch (state) { 136 //At the start of pattern 137 case startState: 138 switch (type) { 139 case tSpace: 140 break; 141 case tStartKeyword: 142 state = keywordState; 143 keyword += ch; 144 break; 145 //If anything else is encountered, it's a syntax error 146 default: 147 status = U_PATTERN_SYNTAX_ERROR; 148 cleanHashTable(); 149 return; 150 }//end of switch(type) 151 break; 152 153 //Handle the keyword state 154 case keywordState: 155 switch (type) { 156 case tSpace: 157 state = pastKeywordState; 158 break; 159 case tStartKeyword: 160 case tContinueKeyword: 161 keyword += ch; 162 break; 163 case tLeftBrace: 164 state = phraseState; 165 break; 166 //If anything else is encountered, it's a syntax error 167 default: 168 status = U_PATTERN_SYNTAX_ERROR; 169 cleanHashTable(); 170 return; 171 }//end of switch(type) 172 break; 173 174 //Handle the pastkeyword state 175 case pastKeywordState: 176 switch (type) { 177 case tSpace: 178 break; 179 case tLeftBrace: 180 state = phraseState; 181 break; 182 //If anything else is encountered, it's a syntax error 183 default: 184 status = U_PATTERN_SYNTAX_ERROR; 185 cleanHashTable(); 186 return; 187 }//end of switch(type) 188 break; 189 190 //Handle the phrase state 191 case phraseState: 192 switch (type) { 193 case tLeftBrace: 194 braceCount++; 195 phrase += ch; 196 break; 197 case tRightBrace: 198 //Matching keyword, phrase pair found 199 if (braceCount == 0){ 200 //Check validity of keyword 201 if (parsedValuesHash->get(keyword) != NULL) { 202 status = U_DUPLICATE_KEYWORD; 203 cleanHashTable(); 204 return; 205 } 206 if (keyword.length() == 0) { 207 status = U_PATTERN_SYNTAX_ERROR; 208 cleanHashTable(); 209 return; 210 } 211 212 //Store the keyword, phrase pair in hashTable 213 ptrPhrase = new UnicodeString(phrase); 214 parsedValuesHash->put( keyword, ptrPhrase, status); 215 216 //Reinitialize 217 keyword.remove(); 218 phrase.remove(); 219 ptrPhrase = NULL; 220 state = startState; 221 } 222 223 if (braceCount > 0){ 224 braceCount-- ; 225 phrase += ch; 226 } 227 break; 228 default: 229 phrase += ch; 230 }//end of switch(type) 231 break; 232 233 //Handle the default case of switch(state) 234 default: 235 status = U_PATTERN_SYNTAX_ERROR; 236 cleanHashTable(); 237 return; 238 239 }//end of switch(state) 240 } 241 242 //Check if the state machine is back to startState 243 if ( state != startState){ 244 status = U_PATTERN_SYNTAX_ERROR; 245 cleanHashTable(); 246 return; 247 } 248 249 //Check if "other" keyword is present 250 if ( !checkSufficientDefinition() ) { 251 status = U_DEFAULT_KEYWORD_MISSING; 252 cleanHashTable(); 253 } 254 return; 255 } 256 257 UnicodeString& 258 SelectFormat::format(const Formattable& obj, 259 UnicodeString& appendTo, 260 FieldPosition& pos, 261 UErrorCode& status) const 262 { 263 switch (obj.getType()) 264 { 265 case Formattable::kString: 266 return format(obj.getString(), appendTo, pos, status); 267 default: 268 if( U_SUCCESS(status) ){ 269 status = U_ILLEGAL_ARGUMENT_ERROR; 270 } 271 return appendTo; 272 } 273 } 274 275 UnicodeString& 276 SelectFormat::format(const UnicodeString& keyword, 277 UnicodeString& appendTo, 278 FieldPosition& /*pos */, 279 UErrorCode& status) const { 280 281 if (U_FAILURE(status)) return appendTo; 282 283 if (parsedValuesHash == NULL) { 284 status = U_INVALID_FORMAT_ERROR; 285 return appendTo; 286 } 287 288 //Check for the validity of the keyword 289 if ( !checkValidKeyword(keyword) ){ 290 status = U_ILLEGAL_ARGUMENT_ERROR; 291 return appendTo; 292 } 293 294 UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword); 295 if (selectedPattern == NULL) { 296 selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER); 297 } 298 299 return appendTo += *selectedPattern; 300 } 301 302 UnicodeString& 303 SelectFormat::toPattern(UnicodeString& appendTo) { 304 return appendTo += pattern; 305 } 306 307 SelectFormat::CharacterClass 308 SelectFormat::classifyCharacter(UChar ch) const{ 309 if ((ch >= CAP_A) && (ch <= CAP_Z)) { 310 return tStartKeyword; 311 } 312 if ((ch >= LOW_A) && (ch <= LOW_Z)) { 313 return tStartKeyword; 314 } 315 if ((ch >= U_ZERO) && (ch <= U_NINE)) { 316 return tContinueKeyword; 317 } 318 if ( uprv_isRuleWhiteSpace(ch) ){ 319 return tSpace; 320 } 321 switch (ch) { 322 case LEFTBRACE: 323 return tLeftBrace; 324 case RIGHTBRACE: 325 return tRightBrace; 326 case HYPHEN: 327 case LOWLINE: 328 return tContinueKeyword; 329 default : 330 return tOther; 331 } 332 } 333 334 UBool 335 SelectFormat::checkSufficientDefinition() { 336 // Check that at least the default rule is defined. 337 return (parsedValuesHash != NULL && 338 parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ; 339 } 340 341 UBool 342 SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{ 343 int32_t len = argKeyword.length(); 344 if (len < 1){ 345 return FALSE; 346 } 347 CharacterClass type = classifyCharacter(argKeyword.charAt(0)); 348 if( type != tStartKeyword ){ 349 return FALSE; 350 } 351 352 for (int32_t i = 0; i < argKeyword.length(); ++i) { 353 type = classifyCharacter(argKeyword.charAt(i)); 354 if( type != tStartKeyword && type != tContinueKeyword ){ 355 return FALSE; 356 } 357 } 358 return TRUE; 359 } 360 361 Format* SelectFormat::clone() const 362 { 363 return new SelectFormat(*this); 364 } 365 366 SelectFormat& 367 SelectFormat::operator=(const SelectFormat& other) { 368 if (this != &other) { 369 UErrorCode status = U_ZERO_ERROR; 370 pattern = other.pattern; 371 copyHashtable(other.parsedValuesHash, status); 372 } 373 return *this; 374 } 375 376 UBool 377 SelectFormat::operator==(const Format& other) const { 378 if( this == &other){ 379 return TRUE; 380 } 381 if (typeid(*this) != typeid(other)) { 382 return FALSE; 383 } 384 SelectFormat* fmt = (SelectFormat*)&other; 385 Hashtable* hashOther = fmt->parsedValuesHash; 386 if ( parsedValuesHash == NULL && hashOther == NULL) 387 return TRUE; 388 if ( parsedValuesHash == NULL || hashOther == NULL) 389 return FALSE; 390 return parsedValuesHash->equals(*hashOther); 391 } 392 393 UBool 394 SelectFormat::operator!=(const Format& other) const { 395 return !operator==(other); 396 } 397 398 void 399 SelectFormat::parseObject(const UnicodeString& /*source*/, 400 Formattable& /*result*/, 401 ParsePosition& pos) const 402 { 403 // TODO: not yet supported in icu4j and icu4c 404 pos.setErrorIndex(pos.getIndex()); 405 } 406 407 void 408 SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) { 409 if (U_FAILURE(status)) { 410 return; 411 } 412 if (other == NULL) { 413 cleanHashTable(); 414 return; 415 } 416 if (parsedValuesHash == NULL) { 417 initHashTable(status); 418 if (U_FAILURE(status)) { 419 return; 420 } 421 } 422 423 parsedValuesHash->removeAll(); 424 parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString); 425 426 int32_t pos = -1; 427 const UHashElement* elem = NULL; 428 429 // walk through the hash table and create a deep clone 430 while ((elem = other->nextElement(pos)) != NULL){ 431 const UHashTok otherKeyTok = elem->key; 432 UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; 433 const UHashTok otherKeyToVal = elem->value; 434 UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; 435 parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status); 436 if (U_FAILURE(status)){ 437 cleanHashTable(); 438 return; 439 } 440 } 441 } 442 443 U_NAMESPACE_END 444 445 #endif /* #if !UCONFIG_NO_FORMATTING */ 446 447 //eof 448