1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * affixpatternparser.h 9 * 10 * created on: 2015jan06 11 * created by: Travis Keep 12 */ 13 14 #ifndef __AFFIX_PATTERN_PARSER_H__ 15 #define __AFFIX_PATTERN_PARSER_H__ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_FORMATTING 20 21 #include "unicode/unistr.h" 22 #include "unicode/uobject.h" 23 #include "pluralaffix.h" 24 25 U_NAMESPACE_BEGIN 26 27 class PluralRules; 28 class FixedPrecision; 29 class DecimalFormatSymbols; 30 31 /** 32 * A representation of the various forms of a particular currency according 33 * to some locale and usage context. 34 * 35 * Includes the symbol, ISO code form, and long form(s) of the currency name 36 * for each plural variation. 37 */ 38 class U_I18N_API CurrencyAffixInfo : public UMemory { 39 public: 40 /** 41 * Symbol is \u00a4; ISO form is \u00a4\u00a4; 42 * long form is \u00a4\u00a4\u00a4. 43 */ 44 CurrencyAffixInfo(); 45 46 const UnicodeString &getSymbol() const { return fSymbol; } 47 const UnicodeString &getISO() const { return fISO; } 48 const PluralAffix &getLong() const { return fLong; } 49 void setSymbol(const UnicodeString &symbol) { 50 fSymbol = symbol; 51 fIsDefault = FALSE; 52 } 53 void setISO(const UnicodeString &iso) { 54 fISO = iso; 55 fIsDefault = FALSE; 56 } 57 UBool 58 equals(const CurrencyAffixInfo &other) const { 59 return (fSymbol == other.fSymbol) 60 && (fISO == other.fISO) 61 && (fLong.equals(other.fLong)) 62 && (fIsDefault == other.fIsDefault); 63 } 64 65 /** 66 * Intializes this instance. 67 * 68 * @param locale the locale for the currency forms. 69 * @param rules The plural rules for the locale. 70 * @param currency the null terminated, 3 character ISO code of the 71 * currency. If NULL, resets this instance as if it were just created. 72 * In this case, the first 2 parameters may be NULL as well. 73 * @param status any error returned here. 74 */ 75 void set( 76 const char *locale, const PluralRules *rules, 77 const UChar *currency, UErrorCode &status); 78 79 /** 80 * Returns true if this instance is the default. That is has no real 81 * currency. For instance never initialized with set() 82 * or reset with set(NULL, NULL, NULL, status). 83 */ 84 UBool isDefault() const { return fIsDefault; } 85 86 /** 87 * Adjusts the precision used for a particular currency. 88 * @param currency the null terminated, 3 character ISO code of the 89 * currency. 90 * @param usage the usage of the currency 91 * @param precision min/max fraction digits and rounding increment 92 * adjusted. 93 * @params status any error reported here. 94 */ 95 static void adjustPrecision( 96 const UChar *currency, const UCurrencyUsage usage, 97 FixedPrecision &precision, UErrorCode &status); 98 99 private: 100 /** 101 * The symbol form of the currency. 102 */ 103 UnicodeString fSymbol; 104 105 /** 106 * The ISO form of the currency, usually three letter abbreviation. 107 */ 108 UnicodeString fISO; 109 110 /** 111 * The long forms of the currency keyed by plural variation. 112 */ 113 PluralAffix fLong; 114 115 UBool fIsDefault; 116 117 }; 118 119 class AffixPatternIterator; 120 121 /** 122 * A locale agnostic representation of an affix pattern. 123 */ 124 class U_I18N_API AffixPattern : public UMemory { 125 public: 126 127 /** 128 * The token types that can appear in an affix pattern. 129 */ 130 enum ETokenType { 131 kLiteral, 132 kPercent, 133 kPerMill, 134 kCurrency, 135 kNegative, 136 kPositive 137 }; 138 139 /** 140 * An empty affix pattern. 141 */ 142 AffixPattern() 143 : tokens(), literals(), hasCurrencyToken(FALSE), 144 hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) { 145 } 146 147 /** 148 * Adds a string literal to this affix pattern. 149 */ 150 void addLiteral(const UChar *, int32_t start, int32_t len); 151 152 /** 153 * Adds a token to this affix pattern. t must not be kLiteral as 154 * the addLiteral() method adds literals. 155 * @param t the token type to add 156 */ 157 void add(ETokenType t); 158 159 /** 160 * Adds a currency token with specific count to this affix pattern. 161 * @param count the token count. Used to distinguish between 162 * one, two, or three currency symbols. Note that adding a currency 163 * token with count=2 (Use ISO code) is different than adding two 164 * currency tokens each with count=1 (two currency symbols). 165 */ 166 void addCurrency(uint8_t count); 167 168 /** 169 * Makes this instance be an empty affix pattern. 170 */ 171 void remove(); 172 173 /** 174 * Provides an iterator over the tokens in this instance. 175 * @param result this is initialized to point just before the 176 * first token of this instance. Caller must call nextToken() 177 * on the iterator once it is set up to have it actually point 178 * to the first token. This first call to nextToken() will return 179 * FALSE if the AffixPattern being iterated over is empty. 180 * @return result 181 */ 182 AffixPatternIterator &iterator(AffixPatternIterator &result) const; 183 184 /** 185 * Returns TRUE if this instance has currency tokens in it. 186 */ 187 UBool usesCurrency() const { 188 return hasCurrencyToken; 189 } 190 191 UBool usesPercent() const { 192 return hasPercentToken; 193 } 194 195 UBool usesPermill() const { 196 return hasPermillToken; 197 } 198 199 /** 200 * Returns the number of code points a string of this instance 201 * would have if none of the special tokens were escaped. 202 * Used to compute the padding size. 203 */ 204 int32_t countChar32() const { 205 return char32Count; 206 } 207 208 /** 209 * Appends other to this instance mutating this instance in place. 210 * @param other The pattern appended to the end of this one. 211 * @return a reference to this instance for chaining. 212 */ 213 AffixPattern &append(const AffixPattern &other); 214 215 /** 216 * Converts this AffixPattern back into a user string. 217 * It is the inverse of parseUserAffixString. 218 */ 219 UnicodeString &toUserString(UnicodeString &appendTo) const; 220 221 /** 222 * Converts this AffixPattern back into a string. 223 * It is the inverse of parseAffixString. 224 */ 225 UnicodeString &toString(UnicodeString &appendTo) const; 226 227 /** 228 * Parses an affix pattern string appending it to an AffixPattern. 229 * Parses affix pattern strings produced from using 230 * DecimalFormatPatternParser to parse a format pattern. Affix patterns 231 * include the positive prefix and suffix and the negative prefix 232 * and suffix. This method expects affix patterns strings to be in the 233 * same format that DecimalFormatPatternParser produces. Namely special 234 * characters in the affix that correspond to a field type must be 235 * prefixed with an apostrophe ('). These special character sequences 236 * inluce minus (-), percent (%), permile (U+2030), plus (+), 237 * short currency (U+00a4), medium currency (u+00a4 * 2), 238 * long currency (u+a4 * 3), and apostrophe (') 239 * (apostrophe does not correspond to a field type but has to be escaped 240 * because it itself is the escape character). 241 * Since the expansion of these special character 242 * sequences is locale dependent, these sequences are not expanded in 243 * an AffixPattern instance. 244 * If these special characters are not prefixed with an apostrophe in 245 * the affix pattern string, then they are treated verbatim just as 246 * any other character. If an apostrophe prefixes a non special 247 * character in the affix pattern, the apostrophe is simply ignored. 248 * 249 * @param affixStr the string from DecimalFormatPatternParser 250 * @param appendTo parsed result appended here. 251 * @param status any error parsing returned here. 252 */ 253 static AffixPattern &parseAffixString( 254 const UnicodeString &affixStr, 255 AffixPattern &appendTo, 256 UErrorCode &status); 257 258 /** 259 * Parses an affix pattern string appending it to an AffixPattern. 260 * Parses affix pattern strings as the user would supply them. 261 * In this function, quoting makes special characters like normal 262 * characters whereas in parseAffixString, quoting makes special 263 * characters special. 264 * 265 * @param affixStr the string from the user 266 * @param appendTo parsed result appended here. 267 * @param status any error parsing returned here. 268 */ 269 static AffixPattern &parseUserAffixString( 270 const UnicodeString &affixStr, 271 AffixPattern &appendTo, 272 UErrorCode &status); 273 274 UBool equals(const AffixPattern &other) const { 275 return (tokens == other.tokens) 276 && (literals == other.literals) 277 && (hasCurrencyToken == other.hasCurrencyToken) 278 && (hasPercentToken == other.hasPercentToken) 279 && (hasPermillToken == other.hasPermillToken) 280 && (char32Count == other.char32Count); 281 } 282 283 private: 284 /* 285 * Tokens stored here. Each UChar generally stands for one token. A 286 * Each token is of form 'etttttttllllllll' llllllll is the length of 287 * the token and ranges from 0-255. ttttttt is the token type and ranges 288 * from 0-127. If e is set it means this is an extendo token (to be 289 * described later). To accomodate token lengths above 255, each normal 290 * token (e=0) can be followed by 0 or more extendo tokens (e=1) with 291 * the same type. Right now only kLiteral Tokens have extendo tokens. 292 * Each extendo token provides the next 8 higher bits for the length. 293 * If a kLiteral token is followed by 2 extendo tokens then, then the 294 * llllllll of the next extendo token contains bits 8-15 of the length 295 * and the last extendo token contains bits 16-23 of the length. 296 */ 297 UnicodeString tokens; 298 299 /* 300 * The characters of the kLiteral tokens are concatenated together here. 301 * The first characters go with the first kLiteral token, the next 302 * characters go with the next kLiteral token etc. 303 */ 304 UnicodeString literals; 305 UBool hasCurrencyToken; 306 UBool hasPercentToken; 307 UBool hasPermillToken; 308 int32_t char32Count; 309 void add(ETokenType t, uint8_t count); 310 311 }; 312 313 /** 314 * An iterator over the tokens in an AffixPattern instance. 315 */ 316 class U_I18N_API AffixPatternIterator : public UMemory { 317 public: 318 319 /** 320 * Using an iterator without first calling iterator on an AffixPattern 321 * instance to initialize the iterator results in 322 * undefined behavior. 323 */ 324 AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { } 325 /** 326 * Advances this iterator to the next token. Returns FALSE when there 327 * are no more tokens. Calling the other methods after nextToken() 328 * returns FALSE results in undefined behavior. 329 */ 330 UBool nextToken(); 331 332 /** 333 * Returns the type of token. 334 */ 335 AffixPattern::ETokenType getTokenType() const; 336 337 /** 338 * For literal tokens, returns the literal string. Calling this for 339 * other token types results in undefined behavior. 340 * @param result replaced with a read-only alias to the literal string. 341 * @return result 342 */ 343 UnicodeString &getLiteral(UnicodeString &result) const; 344 345 /** 346 * Returns the token length. Usually 1, but for currency tokens may 347 * be 2 for ISO code and 3 for long form. 348 */ 349 int32_t getTokenLength() const; 350 private: 351 int32_t nextLiteralIndex; 352 int32_t lastLiteralLength; 353 int32_t nextTokenIndex; 354 const UnicodeString *tokens; 355 const UnicodeString *literals; 356 friend class AffixPattern; 357 AffixPatternIterator(const AffixPatternIterator &); 358 AffixPatternIterator &operator=(const AffixPatternIterator &); 359 }; 360 361 /** 362 * A locale aware class that converts locale independent AffixPattern 363 * instances into locale dependent PluralAffix instances. 364 */ 365 class U_I18N_API AffixPatternParser : public UMemory { 366 public: 367 AffixPatternParser(); 368 AffixPatternParser(const DecimalFormatSymbols &symbols); 369 void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols); 370 371 /** 372 * Parses affixPattern appending the result to appendTo. 373 * @param affixPattern The affix pattern. 374 * @param currencyAffixInfo contains the currency forms. 375 * @param appendTo The result of parsing affixPattern is appended here. 376 * @param status any error returned here. 377 * @return appendTo. 378 */ 379 PluralAffix &parse( 380 const AffixPattern &affixPattern, 381 const CurrencyAffixInfo ¤cyAffixInfo, 382 PluralAffix &appendTo, 383 UErrorCode &status) const; 384 385 UBool equals(const AffixPatternParser &other) const { 386 return (fPercent == other.fPercent) 387 && (fPermill == other.fPermill) 388 && (fNegative == other.fNegative) 389 && (fPositive == other.fPositive); 390 } 391 392 private: 393 UnicodeString fPercent; 394 UnicodeString fPermill; 395 UnicodeString fNegative; 396 UnicodeString fPositive; 397 }; 398 399 400 U_NAMESPACE_END 401 #endif /* #if !UCONFIG_NO_FORMATTING */ 402 #endif // __AFFIX_PATTERN_PARSER_H__ 403