Home | History | Annotate | Download | only in i18n
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 * Copyright (C) 2015, International Business Machines
      6 * Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 * affixpatternparser.h
      9 *
     10 * created on: 2015jan06
     11 * created by: Travis Keep
     12 */
     13 
     14 #ifndef __AFFIX_PATTERN_PARSER_H__
     15 #define __AFFIX_PATTERN_PARSER_H__
     16 
     17 #include "unicode/utypes.h"
     18 
     19 #if !UCONFIG_NO_FORMATTING
     20 
     21 #include "unicode/unistr.h"
     22 #include "unicode/uobject.h"
     23 #include "pluralaffix.h"
     24 
     25 U_NAMESPACE_BEGIN
     26 
     27 class PluralRules;
     28 class FixedPrecision;
     29 class DecimalFormatSymbols;
     30 
     31 /**
     32  * A representation of the various forms of a particular currency according
     33  * to some locale and usage context.
     34  *
     35  * Includes the symbol, ISO code form, and long form(s) of the currency name
     36  * for each plural variation.
     37  */
     38 class U_I18N_API CurrencyAffixInfo : public UMemory {
     39 public:
     40     /**
     41      * Symbol is \u00a4; ISO form is \u00a4\u00a4;
     42      *  long form is \u00a4\u00a4\u00a4.
     43      */
     44     CurrencyAffixInfo();
     45 
     46     const UnicodeString &getSymbol() const { return fSymbol; }
     47     const UnicodeString &getISO() const { return fISO; }
     48     const PluralAffix &getLong() const { return fLong; }
     49     void setSymbol(const UnicodeString &symbol) {
     50         fSymbol = symbol;
     51         fIsDefault = FALSE;
     52     }
     53     void setISO(const UnicodeString &iso) {
     54         fISO = iso;
     55         fIsDefault = FALSE;
     56     }
     57     UBool
     58     equals(const CurrencyAffixInfo &other) const {
     59         return (fSymbol == other.fSymbol)
     60                 && (fISO == other.fISO)
     61                 && (fLong.equals(other.fLong))
     62                 && (fIsDefault == other.fIsDefault);
     63     }
     64 
     65     /**
     66      * Intializes this instance.
     67      *
     68      * @param locale the locale for the currency forms.
     69      * @param rules The plural rules for the locale.
     70      * @param currency the null terminated, 3 character ISO code of the
     71      * currency. If NULL, resets this instance as if it were just created.
     72      * In this case, the first 2 parameters may be NULL as well.
     73      * @param status any error returned here.
     74      */
     75     void set(
     76             const char *locale, const PluralRules *rules,
     77             const UChar *currency, UErrorCode &status);
     78 
     79     /**
     80      * Returns true if this instance is the default. That is has no real
     81      * currency. For instance never initialized with set()
     82      * or reset with set(NULL, NULL, NULL, status).
     83      */
     84     UBool isDefault() const { return fIsDefault; }
     85 
     86     /**
     87      * Adjusts the precision used for a particular currency.
     88      * @param currency the null terminated, 3 character ISO code of the
     89      * currency.
     90      * @param usage the usage of the currency
     91      * @param precision min/max fraction digits and rounding increment
     92      *  adjusted.
     93      * @params status any error reported here.
     94      */
     95     static void adjustPrecision(
     96             const UChar *currency, const UCurrencyUsage usage,
     97             FixedPrecision &precision, UErrorCode &status);
     98 
     99 private:
    100     /**
    101      * The symbol form of the currency.
    102      */
    103     UnicodeString fSymbol;
    104 
    105     /**
    106      * The ISO form of the currency, usually three letter abbreviation.
    107      */
    108     UnicodeString fISO;
    109 
    110     /**
    111      * The long forms of the currency keyed by plural variation.
    112      */
    113     PluralAffix fLong;
    114 
    115     UBool fIsDefault;
    116 
    117 };
    118 
    119 class AffixPatternIterator;
    120 
    121 /**
    122  * A locale agnostic representation of an affix pattern.
    123  */
    124 class U_I18N_API AffixPattern : public UMemory {
    125 public:
    126 
    127     /**
    128      * The token types that can appear in an affix pattern.
    129      */
    130     enum ETokenType {
    131         kLiteral,
    132         kPercent,
    133         kPerMill,
    134         kCurrency,
    135         kNegative,
    136         kPositive
    137     };
    138 
    139     /**
    140      * An empty affix pattern.
    141      */
    142     AffixPattern()
    143             : tokens(), literals(), hasCurrencyToken(FALSE),
    144               hasPercentToken(FALSE), hasPermillToken(FALSE),  char32Count(0) {
    145     }
    146 
    147     /**
    148      * Adds a string literal to this affix pattern.
    149      */
    150     void addLiteral(const UChar *, int32_t start, int32_t len);
    151 
    152     /**
    153      * Adds a token to this affix pattern. t must not be kLiteral as
    154      * the addLiteral() method adds literals.
    155      * @param t the token type to add
    156      */
    157     void add(ETokenType t);
    158 
    159     /**
    160      * Adds a currency token with specific count to this affix pattern.
    161      * @param count the token count. Used to distinguish between
    162      *  one, two, or three currency symbols. Note that adding a currency
    163      *  token with count=2 (Use ISO code) is different than adding two
    164      *  currency tokens each with count=1 (two currency symbols).
    165      */
    166     void addCurrency(uint8_t count);
    167 
    168     /**
    169      * Makes this instance be an empty affix pattern.
    170      */
    171     void remove();
    172 
    173     /**
    174      * Provides an iterator over the tokens in this instance.
    175      * @param result this is initialized to point just before the
    176      *   first token of this instance. Caller must call nextToken()
    177      *   on the iterator once it is set up to have it actually point
    178      *   to the first token. This first call to nextToken() will return
    179      *   FALSE if the AffixPattern being iterated over is empty.
    180      * @return result
    181      */
    182     AffixPatternIterator &iterator(AffixPatternIterator &result) const;
    183 
    184     /**
    185      * Returns TRUE if this instance has currency tokens in it.
    186      */
    187     UBool usesCurrency() const {
    188         return hasCurrencyToken;
    189     }
    190 
    191     UBool usesPercent() const {
    192         return hasPercentToken;
    193     }
    194 
    195     UBool usesPermill() const {
    196         return hasPermillToken;
    197     }
    198 
    199     /**
    200      * Returns the number of code points a string of this instance
    201      * would have if none of the special tokens were escaped.
    202      * Used to compute the padding size.
    203      */
    204     int32_t countChar32() const {
    205         return char32Count;
    206     }
    207 
    208     /**
    209      * Appends other to this instance mutating this instance in place.
    210      * @param other The pattern appended to the end of this one.
    211      * @return a reference to this instance for chaining.
    212      */
    213     AffixPattern &append(const AffixPattern &other);
    214 
    215     /**
    216      * Converts this AffixPattern back into a user string.
    217      * It is the inverse of parseUserAffixString.
    218      */
    219     UnicodeString &toUserString(UnicodeString &appendTo) const;
    220 
    221     /**
    222      * Converts this AffixPattern back into a string.
    223      * It is the inverse of parseAffixString.
    224      */
    225     UnicodeString &toString(UnicodeString &appendTo) const;
    226 
    227     /**
    228      * Parses an affix pattern string appending it to an AffixPattern.
    229      * Parses affix pattern strings produced from using
    230      * DecimalFormatPatternParser to parse a format pattern. Affix patterns
    231      * include the positive prefix and suffix and the negative prefix
    232      * and suffix. This method expects affix patterns strings to be in the
    233      * same format that DecimalFormatPatternParser produces. Namely special
    234      * characters in the affix that correspond to a field type must be
    235      * prefixed with an apostrophe ('). These special character sequences
    236      * inluce minus (-), percent (%), permile (U+2030), plus (+),
    237      * short currency (U+00a4), medium currency (u+00a4 * 2),
    238      * long currency (u+a4 * 3), and apostrophe (')
    239      * (apostrophe does not correspond to a field type but has to be escaped
    240      * because it itself is the escape character).
    241      * Since the expansion of these special character
    242      * sequences is locale dependent, these sequences are not expanded in
    243      * an AffixPattern instance.
    244      * If these special characters are not prefixed with an apostrophe in
    245      * the affix pattern string, then they are treated verbatim just as
    246      * any other character. If an apostrophe prefixes a non special
    247      * character in the affix pattern, the apostrophe is simply ignored.
    248      *
    249      * @param affixStr the string from DecimalFormatPatternParser
    250      * @param appendTo parsed result appended here.
    251      * @param status any error parsing returned here.
    252      */
    253     static AffixPattern &parseAffixString(
    254             const UnicodeString &affixStr,
    255             AffixPattern &appendTo,
    256             UErrorCode &status);
    257 
    258     /**
    259      * Parses an affix pattern string appending it to an AffixPattern.
    260      * Parses affix pattern strings as the user would supply them.
    261      * In this function, quoting makes special characters like normal
    262      * characters whereas in parseAffixString, quoting makes special
    263      * characters special.
    264      *
    265      * @param affixStr the string from the user
    266      * @param appendTo parsed result appended here.
    267      * @param status any error parsing returned here.
    268      */
    269     static AffixPattern &parseUserAffixString(
    270             const UnicodeString &affixStr,
    271             AffixPattern &appendTo,
    272             UErrorCode &status);
    273 
    274     UBool equals(const AffixPattern &other) const {
    275         return (tokens == other.tokens)
    276                 && (literals == other.literals)
    277                 && (hasCurrencyToken == other.hasCurrencyToken)
    278                 && (hasPercentToken == other.hasPercentToken)
    279                 && (hasPermillToken == other.hasPermillToken)
    280                 && (char32Count == other.char32Count);
    281     }
    282 
    283 private:
    284     /*
    285      * Tokens stored here. Each UChar generally stands for one token. A
    286      * Each token is of form 'etttttttllllllll' llllllll is the length of
    287      * the token and ranges from 0-255. ttttttt is the token type and ranges
    288      * from 0-127. If e is set it means this is an extendo token (to be
    289      * described later). To accomodate token lengths above 255, each normal
    290      * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
    291      * the same type. Right now only kLiteral Tokens have extendo tokens.
    292      * Each extendo token provides the next 8 higher bits for the length.
    293      * If a kLiteral token is followed by 2 extendo tokens then, then the
    294      * llllllll of the next extendo token contains bits 8-15 of the length
    295      * and the last extendo token contains bits 16-23 of the length.
    296      */
    297     UnicodeString tokens;
    298 
    299     /*
    300      * The characters of the kLiteral tokens are concatenated together here.
    301      * The first characters go with the first kLiteral token, the next
    302      * characters go with the next kLiteral token etc.
    303      */
    304     UnicodeString literals;
    305     UBool hasCurrencyToken;
    306     UBool hasPercentToken;
    307     UBool hasPermillToken;
    308     int32_t char32Count;
    309     void add(ETokenType t, uint8_t count);
    310 
    311 };
    312 
    313 /**
    314  * An iterator over the tokens in an AffixPattern instance.
    315  */
    316 class U_I18N_API AffixPatternIterator : public UMemory {
    317 public:
    318 
    319     /**
    320      * Using an iterator without first calling iterator on an AffixPattern
    321      * instance to initialize the iterator results in
    322      * undefined behavior.
    323      */
    324     AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
    325     /**
    326      * Advances this iterator to the next token. Returns FALSE when there
    327      * are no more tokens. Calling the other methods after nextToken()
    328      * returns FALSE results in undefined behavior.
    329      */
    330     UBool nextToken();
    331 
    332     /**
    333      * Returns the type of token.
    334      */
    335     AffixPattern::ETokenType getTokenType() const;
    336 
    337     /**
    338      * For literal tokens, returns the literal string. Calling this for
    339      * other token types results in undefined behavior.
    340      * @param result replaced with a read-only alias to the literal string.
    341      * @return result
    342      */
    343     UnicodeString &getLiteral(UnicodeString &result) const;
    344 
    345     /**
    346      * Returns the token length. Usually 1, but for currency tokens may
    347      * be 2 for ISO code and 3 for long form.
    348      */
    349     int32_t getTokenLength() const;
    350 private:
    351     int32_t nextLiteralIndex;
    352     int32_t lastLiteralLength;
    353     int32_t nextTokenIndex;
    354     const UnicodeString *tokens;
    355     const UnicodeString *literals;
    356     friend class AffixPattern;
    357     AffixPatternIterator(const AffixPatternIterator &);
    358     AffixPatternIterator &operator=(const AffixPatternIterator &);
    359 };
    360 
    361 /**
    362  * A locale aware class that converts locale independent AffixPattern
    363  * instances into locale dependent PluralAffix instances.
    364  */
    365 class U_I18N_API AffixPatternParser : public UMemory {
    366 public:
    367 AffixPatternParser();
    368 AffixPatternParser(const DecimalFormatSymbols &symbols);
    369 void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
    370 
    371 /**
    372  * Parses affixPattern appending the result to appendTo.
    373  * @param affixPattern The affix pattern.
    374  * @param currencyAffixInfo contains the currency forms.
    375  * @param appendTo The result of parsing affixPattern is appended here.
    376  * @param status any error returned here.
    377  * @return appendTo.
    378  */
    379 PluralAffix &parse(
    380         const AffixPattern &affixPattern,
    381         const CurrencyAffixInfo &currencyAffixInfo,
    382         PluralAffix &appendTo,
    383         UErrorCode &status) const;
    384 
    385 UBool equals(const AffixPatternParser &other) const {
    386     return (fPercent == other.fPercent)
    387             && (fPermill == other.fPermill)
    388             && (fNegative == other.fNegative)
    389             && (fPositive == other.fPositive);
    390 }
    391 
    392 private:
    393 UnicodeString fPercent;
    394 UnicodeString fPermill;
    395 UnicodeString fNegative;
    396 UnicodeString fPositive;
    397 };
    398 
    399 
    400 U_NAMESPACE_END
    401 #endif /* #if !UCONFIG_NO_FORMATTING */
    402 #endif  // __AFFIX_PATTERN_PARSER_H__
    403