1 /* 2 ******************************************************************************* 3 * Copyright (C) 2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * affixpatternparser.h 7 * 8 * created on: 2015jan06 9 * created by: Travis Keep 10 */ 11 12 #ifndef __AFFIX_PATTERN_PARSER_H__ 13 #define __AFFIX_PATTERN_PARSER_H__ 14 15 #include "unicode/utypes.h" 16 17 #if !UCONFIG_NO_FORMATTING 18 19 #include "unicode/unistr.h" 20 #include "unicode/uobject.h" 21 #include "pluralaffix.h" 22 23 U_NAMESPACE_BEGIN 24 25 class PluralRules; 26 class FixedPrecision; 27 class DecimalFormatSymbols; 28 29 /** 30 * A representation of the various forms of a particular currency according 31 * to some locale and usage context. 32 * 33 * Includes the symbol, ISO code form, and long form(s) of the currency name 34 * for each plural variation. 35 */ 36 class U_I18N_API CurrencyAffixInfo : public UMemory { 37 public: 38 /** 39 * Symbol is \u00a4; ISO form is \u00a4\u00a4; 40 * long form is \u00a4\u00a4\u00a4. 41 */ 42 CurrencyAffixInfo(); 43 44 const UnicodeString &getSymbol() const { return fSymbol; } 45 const UnicodeString &getISO() const { return fISO; } 46 const PluralAffix &getLong() const { return fLong; } 47 void setSymbol(const UnicodeString &symbol) { 48 fSymbol = symbol; 49 fIsDefault = FALSE; 50 } 51 void setISO(const UnicodeString &iso) { 52 fISO = iso; 53 fIsDefault = FALSE; 54 } 55 UBool 56 equals(const CurrencyAffixInfo &other) const { 57 return (fSymbol == other.fSymbol) 58 && (fISO == other.fISO) 59 && (fLong.equals(other.fLong)) 60 && (fIsDefault == other.fIsDefault); 61 } 62 63 /** 64 * Intializes this instance. 65 * 66 * @param locale the locale for the currency forms. 67 * @param rules The plural rules for the locale. 68 * @param currency the null terminated, 3 character ISO code of the 69 * currency. If NULL, resets this instance as if it were just created. 70 * In this case, the first 2 parameters may be NULL as well. 71 * @param status any error returned here. 72 */ 73 void set( 74 const char *locale, const PluralRules *rules, 75 const UChar *currency, UErrorCode &status); 76 77 /** 78 * Returns true if this instance is the default. That is has no real 79 * currency. For instance never initialized with set() 80 * or reset with set(NULL, NULL, NULL, status). 81 */ 82 UBool isDefault() const { return fIsDefault; } 83 84 /** 85 * Adjusts the precision used for a particular currency. 86 * @param currency the null terminated, 3 character ISO code of the 87 * currency. 88 * @param usage the usage of the currency 89 * @param precision min/max fraction digits and rounding increment 90 * adjusted. 91 * @params status any error reported here. 92 */ 93 static void adjustPrecision( 94 const UChar *currency, const UCurrencyUsage usage, 95 FixedPrecision &precision, UErrorCode &status); 96 97 private: 98 /** 99 * The symbol form of the currency. 100 */ 101 UnicodeString fSymbol; 102 103 /** 104 * The ISO form of the currency, usually three letter abbreviation. 105 */ 106 UnicodeString fISO; 107 108 /** 109 * The long forms of the currency keyed by plural variation. 110 */ 111 PluralAffix fLong; 112 113 UBool fIsDefault; 114 115 }; 116 117 class AffixPatternIterator; 118 119 /** 120 * A locale agnostic representation of an affix pattern. 121 */ 122 class U_I18N_API AffixPattern : public UMemory { 123 public: 124 125 /** 126 * The token types that can appear in an affix pattern. 127 */ 128 enum ETokenType { 129 kLiteral, 130 kPercent, 131 kPerMill, 132 kCurrency, 133 kNegative, 134 kPositive 135 }; 136 137 /** 138 * An empty affix pattern. 139 */ 140 AffixPattern() 141 : tokens(), literals(), hasCurrencyToken(FALSE), 142 hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) { 143 } 144 145 /** 146 * Adds a string literal to this affix pattern. 147 */ 148 void addLiteral(const UChar *, int32_t start, int32_t len); 149 150 /** 151 * Adds a token to this affix pattern. t must not be kLiteral as 152 * the addLiteral() method adds literals. 153 * @param t the token type to add 154 */ 155 void add(ETokenType t); 156 157 /** 158 * Adds a currency token with specific count to this affix pattern. 159 * @param count the token count. Used to distinguish between 160 * one, two, or three currency symbols. Note that adding a currency 161 * token with count=2 (Use ISO code) is different than adding two 162 * currency tokens each with count=1 (two currency symbols). 163 */ 164 void addCurrency(uint8_t count); 165 166 /** 167 * Makes this instance be an empty affix pattern. 168 */ 169 void remove(); 170 171 /** 172 * Provides an iterator over the tokens in this instance. 173 * @param result this is initialized to point just before the 174 * first token of this instance. Caller must call nextToken() 175 * on the iterator once it is set up to have it actually point 176 * to the first token. This first call to nextToken() will return 177 * FALSE if the AffixPattern being iterated over is empty. 178 * @return result 179 */ 180 AffixPatternIterator &iterator(AffixPatternIterator &result) const; 181 182 /** 183 * Returns TRUE if this instance has currency tokens in it. 184 */ 185 UBool usesCurrency() const { 186 return hasCurrencyToken; 187 } 188 189 UBool usesPercent() const { 190 return hasPercentToken; 191 } 192 193 UBool usesPermill() const { 194 return hasPermillToken; 195 } 196 197 /** 198 * Returns the number of code points a string of this instance 199 * would have if none of the special tokens were escaped. 200 * Used to compute the padding size. 201 */ 202 int32_t countChar32() const { 203 return char32Count; 204 } 205 206 /** 207 * Appends other to this instance mutating this instance in place. 208 * @param other The pattern appended to the end of this one. 209 * @return a reference to this instance for chaining. 210 */ 211 AffixPattern &append(const AffixPattern &other); 212 213 /** 214 * Converts this AffixPattern back into a user string. 215 * It is the inverse of parseUserAffixString. 216 */ 217 UnicodeString &toUserString(UnicodeString &appendTo) const; 218 219 /** 220 * Converts this AffixPattern back into a string. 221 * It is the inverse of parseAffixString. 222 */ 223 UnicodeString &toString(UnicodeString &appendTo) const; 224 225 /** 226 * Parses an affix pattern string appending it to an AffixPattern. 227 * Parses affix pattern strings produced from using 228 * DecimalFormatPatternParser to parse a format pattern. Affix patterns 229 * include the positive prefix and suffix and the negative prefix 230 * and suffix. This method expects affix patterns strings to be in the 231 * same format that DecimalFormatPatternParser produces. Namely special 232 * characters in the affix that correspond to a field type must be 233 * prefixed with an apostrophe ('). These special character sequences 234 * inluce minus (-), percent (%), permile (U+2030), plus (+), 235 * short currency (U+00a4), medium currency (u+00a4 * 2), 236 * long currency (u+a4 * 3), and apostrophe (') 237 * (apostrophe does not correspond to a field type but has to be escaped 238 * because it itself is the escape character). 239 * Since the expansion of these special character 240 * sequences is locale dependent, these sequences are not expanded in 241 * an AffixPattern instance. 242 * If these special characters are not prefixed with an apostrophe in 243 * the affix pattern string, then they are treated verbatim just as 244 * any other character. If an apostrophe prefixes a non special 245 * character in the affix pattern, the apostrophe is simply ignored. 246 * 247 * @param affixStr the string from DecimalFormatPatternParser 248 * @param appendTo parsed result appended here. 249 * @param status any error parsing returned here. 250 */ 251 static AffixPattern &parseAffixString( 252 const UnicodeString &affixStr, 253 AffixPattern &appendTo, 254 UErrorCode &status); 255 256 /** 257 * Parses an affix pattern string appending it to an AffixPattern. 258 * Parses affix pattern strings as the user would supply them. 259 * In this function, quoting makes special characters like normal 260 * characters whereas in parseAffixString, quoting makes special 261 * characters special. 262 * 263 * @param affixStr the string from the user 264 * @param appendTo parsed result appended here. 265 * @param status any error parsing returned here. 266 */ 267 static AffixPattern &parseUserAffixString( 268 const UnicodeString &affixStr, 269 AffixPattern &appendTo, 270 UErrorCode &status); 271 272 UBool equals(const AffixPattern &other) const { 273 return (tokens == other.tokens) 274 && (literals == other.literals) 275 && (hasCurrencyToken == other.hasCurrencyToken) 276 && (hasPercentToken == other.hasPercentToken) 277 && (hasPermillToken == other.hasPermillToken) 278 && (char32Count == other.char32Count); 279 } 280 281 private: 282 /* 283 * Tokens stored here. Each UChar generally stands for one token. A 284 * Each token is of form 'etttttttllllllll' llllllll is the length of 285 * the token and ranges from 0-255. ttttttt is the token type and ranges 286 * from 0-127. If e is set it means this is an extendo token (to be 287 * described later). To accomodate token lengths above 255, each normal 288 * token (e=0) can be followed by 0 or more extendo tokens (e=1) with 289 * the same type. Right now only kLiteral Tokens have extendo tokens. 290 * Each extendo token provides the next 8 higher bits for the length. 291 * If a kLiteral token is followed by 2 extendo tokens then, then the 292 * llllllll of the next extendo token contains bits 8-15 of the length 293 * and the last extendo token contains bits 16-23 of the length. 294 */ 295 UnicodeString tokens; 296 297 /* 298 * The characters of the kLiteral tokens are concatenated together here. 299 * The first characters go with the first kLiteral token, the next 300 * characters go with the next kLiteral token etc. 301 */ 302 UnicodeString literals; 303 UBool hasCurrencyToken; 304 UBool hasPercentToken; 305 UBool hasPermillToken; 306 int32_t char32Count; 307 void add(ETokenType t, uint8_t count); 308 309 }; 310 311 /** 312 * An iterator over the tokens in an AffixPattern instance. 313 */ 314 class U_I18N_API AffixPatternIterator : public UMemory { 315 public: 316 317 /** 318 * Using an iterator without first calling iterator on an AffixPattern 319 * instance to initialize the iterator results in 320 * undefined behavior. 321 */ 322 AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { } 323 /** 324 * Advances this iterator to the next token. Returns FALSE when there 325 * are no more tokens. Calling the other methods after nextToken() 326 * returns FALSE results in undefined behavior. 327 */ 328 UBool nextToken(); 329 330 /** 331 * Returns the type of token. 332 */ 333 AffixPattern::ETokenType getTokenType() const; 334 335 /** 336 * For literal tokens, returns the literal string. Calling this for 337 * other token types results in undefined behavior. 338 * @param result replaced with a read-only alias to the literal string. 339 * @return result 340 */ 341 UnicodeString &getLiteral(UnicodeString &result) const; 342 343 /** 344 * Returns the token length. Usually 1, but for currency tokens may 345 * be 2 for ISO code and 3 for long form. 346 */ 347 int32_t getTokenLength() const; 348 private: 349 int32_t nextLiteralIndex; 350 int32_t lastLiteralLength; 351 int32_t nextTokenIndex; 352 const UnicodeString *tokens; 353 const UnicodeString *literals; 354 friend class AffixPattern; 355 AffixPatternIterator(const AffixPatternIterator &); 356 AffixPatternIterator &operator=(const AffixPatternIterator &); 357 }; 358 359 /** 360 * A locale aware class that converts locale independent AffixPattern 361 * instances into locale dependent PluralAffix instances. 362 */ 363 class U_I18N_API AffixPatternParser : public UMemory { 364 public: 365 AffixPatternParser(); 366 AffixPatternParser(const DecimalFormatSymbols &symbols); 367 void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols); 368 369 /** 370 * Parses affixPattern appending the result to appendTo. 371 * @param affixPattern The affix pattern. 372 * @param currencyAffixInfo contains the currency forms. 373 * @param appendTo The result of parsing affixPattern is appended here. 374 * @param status any error returned here. 375 * @return appendTo. 376 */ 377 PluralAffix &parse( 378 const AffixPattern &affixPattern, 379 const CurrencyAffixInfo ¤cyAffixInfo, 380 PluralAffix &appendTo, 381 UErrorCode &status) const; 382 383 UBool equals(const AffixPatternParser &other) const { 384 return (fPercent == other.fPercent) 385 && (fPermill == other.fPermill) 386 && (fNegative == other.fNegative) 387 && (fPositive == other.fPositive); 388 } 389 390 private: 391 UnicodeString fPercent; 392 UnicodeString fPermill; 393 UnicodeString fNegative; 394 UnicodeString fPositive; 395 }; 396 397 398 U_NAMESPACE_END 399 #endif /* #if !UCONFIG_NO_FORMATTING */ 400 #endif // __AFFIX_PATTERN_PARSER_H__ 401