Home | History | Annotate | Download | only in i18n
      1 /*
      2 *******************************************************************************
      3 * Copyright (C) 2015, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 *******************************************************************************
      6 * affixpatternparser.h
      7 *
      8 * created on: 2015jan06
      9 * created by: Travis Keep
     10 */
     11 
     12 #ifndef __AFFIX_PATTERN_PARSER_H__
     13 #define __AFFIX_PATTERN_PARSER_H__
     14 
     15 #include "unicode/utypes.h"
     16 
     17 #if !UCONFIG_NO_FORMATTING
     18 
     19 #include "unicode/unistr.h"
     20 #include "unicode/uobject.h"
     21 #include "pluralaffix.h"
     22 
     23 U_NAMESPACE_BEGIN
     24 
     25 class PluralRules;
     26 class FixedPrecision;
     27 class DecimalFormatSymbols;
     28 
     29 /**
     30  * A representation of the various forms of a particular currency according
     31  * to some locale and usage context.
     32  *
     33  * Includes the symbol, ISO code form, and long form(s) of the currency name
     34  * for each plural variation.
     35  */
     36 class U_I18N_API CurrencyAffixInfo : public UMemory {
     37 public:
     38     /**
     39      * Symbol is \u00a4; ISO form is \u00a4\u00a4;
     40      *  long form is \u00a4\u00a4\u00a4.
     41      */
     42     CurrencyAffixInfo();
     43 
     44     const UnicodeString &getSymbol() const { return fSymbol; }
     45     const UnicodeString &getISO() const { return fISO; }
     46     const PluralAffix &getLong() const { return fLong; }
     47     void setSymbol(const UnicodeString &symbol) {
     48         fSymbol = symbol;
     49         fIsDefault = FALSE;
     50     }
     51     void setISO(const UnicodeString &iso) {
     52         fISO = iso;
     53         fIsDefault = FALSE;
     54     }
     55     UBool
     56     equals(const CurrencyAffixInfo &other) const {
     57         return (fSymbol == other.fSymbol)
     58                 && (fISO == other.fISO)
     59                 && (fLong.equals(other.fLong))
     60                 && (fIsDefault == other.fIsDefault);
     61     }
     62 
     63     /**
     64      * Intializes this instance.
     65      *
     66      * @param locale the locale for the currency forms.
     67      * @param rules The plural rules for the locale.
     68      * @param currency the null terminated, 3 character ISO code of the
     69      * currency. If NULL, resets this instance as if it were just created.
     70      * In this case, the first 2 parameters may be NULL as well.
     71      * @param status any error returned here.
     72      */
     73     void set(
     74             const char *locale, const PluralRules *rules,
     75             const UChar *currency, UErrorCode &status);
     76 
     77     /**
     78      * Returns true if this instance is the default. That is has no real
     79      * currency. For instance never initialized with set()
     80      * or reset with set(NULL, NULL, NULL, status).
     81      */
     82     UBool isDefault() const { return fIsDefault; }
     83 
     84     /**
     85      * Adjusts the precision used for a particular currency.
     86      * @param currency the null terminated, 3 character ISO code of the
     87      * currency.
     88      * @param usage the usage of the currency
     89      * @param precision min/max fraction digits and rounding increment
     90      *  adjusted.
     91      * @params status any error reported here.
     92      */
     93     static void adjustPrecision(
     94             const UChar *currency, const UCurrencyUsage usage,
     95             FixedPrecision &precision, UErrorCode &status);
     96 
     97 private:
     98     /**
     99      * The symbol form of the currency.
    100      */
    101     UnicodeString fSymbol;
    102 
    103     /**
    104      * The ISO form of the currency, usually three letter abbreviation.
    105      */
    106     UnicodeString fISO;
    107 
    108     /**
    109      * The long forms of the currency keyed by plural variation.
    110      */
    111     PluralAffix fLong;
    112 
    113     UBool fIsDefault;
    114 
    115 };
    116 
    117 class AffixPatternIterator;
    118 
    119 /**
    120  * A locale agnostic representation of an affix pattern.
    121  */
    122 class U_I18N_API AffixPattern : public UMemory {
    123 public:
    124 
    125     /**
    126      * The token types that can appear in an affix pattern.
    127      */
    128     enum ETokenType {
    129         kLiteral,
    130         kPercent,
    131         kPerMill,
    132         kCurrency,
    133         kNegative,
    134         kPositive
    135     };
    136 
    137     /**
    138      * An empty affix pattern.
    139      */
    140     AffixPattern()
    141             : tokens(), literals(), hasCurrencyToken(FALSE),
    142               hasPercentToken(FALSE), hasPermillToken(FALSE),  char32Count(0) {
    143     }
    144 
    145     /**
    146      * Adds a string literal to this affix pattern.
    147      */
    148     void addLiteral(const UChar *, int32_t start, int32_t len);
    149 
    150     /**
    151      * Adds a token to this affix pattern. t must not be kLiteral as
    152      * the addLiteral() method adds literals.
    153      * @param t the token type to add
    154      */
    155     void add(ETokenType t);
    156 
    157     /**
    158      * Adds a currency token with specific count to this affix pattern.
    159      * @param count the token count. Used to distinguish between
    160      *  one, two, or three currency symbols. Note that adding a currency
    161      *  token with count=2 (Use ISO code) is different than adding two
    162      *  currency tokens each with count=1 (two currency symbols).
    163      */
    164     void addCurrency(uint8_t count);
    165 
    166     /**
    167      * Makes this instance be an empty affix pattern.
    168      */
    169     void remove();
    170 
    171     /**
    172      * Provides an iterator over the tokens in this instance.
    173      * @param result this is initialized to point just before the
    174      *   first token of this instance. Caller must call nextToken()
    175      *   on the iterator once it is set up to have it actually point
    176      *   to the first token. This first call to nextToken() will return
    177      *   FALSE if the AffixPattern being iterated over is empty.
    178      * @return result
    179      */
    180     AffixPatternIterator &iterator(AffixPatternIterator &result) const;
    181 
    182     /**
    183      * Returns TRUE if this instance has currency tokens in it.
    184      */
    185     UBool usesCurrency() const {
    186         return hasCurrencyToken;
    187     }
    188 
    189     UBool usesPercent() const {
    190         return hasPercentToken;
    191     }
    192 
    193     UBool usesPermill() const {
    194         return hasPermillToken;
    195     }
    196 
    197     /**
    198      * Returns the number of code points a string of this instance
    199      * would have if none of the special tokens were escaped.
    200      * Used to compute the padding size.
    201      */
    202     int32_t countChar32() const {
    203         return char32Count;
    204     }
    205 
    206     /**
    207      * Appends other to this instance mutating this instance in place.
    208      * @param other The pattern appended to the end of this one.
    209      * @return a reference to this instance for chaining.
    210      */
    211     AffixPattern &append(const AffixPattern &other);
    212 
    213     /**
    214      * Converts this AffixPattern back into a user string.
    215      * It is the inverse of parseUserAffixString.
    216      */
    217     UnicodeString &toUserString(UnicodeString &appendTo) const;
    218 
    219     /**
    220      * Converts this AffixPattern back into a string.
    221      * It is the inverse of parseAffixString.
    222      */
    223     UnicodeString &toString(UnicodeString &appendTo) const;
    224 
    225     /**
    226      * Parses an affix pattern string appending it to an AffixPattern.
    227      * Parses affix pattern strings produced from using
    228      * DecimalFormatPatternParser to parse a format pattern. Affix patterns
    229      * include the positive prefix and suffix and the negative prefix
    230      * and suffix. This method expects affix patterns strings to be in the
    231      * same format that DecimalFormatPatternParser produces. Namely special
    232      * characters in the affix that correspond to a field type must be
    233      * prefixed with an apostrophe ('). These special character sequences
    234      * inluce minus (-), percent (%), permile (U+2030), plus (+),
    235      * short currency (U+00a4), medium currency (u+00a4 * 2),
    236      * long currency (u+a4 * 3), and apostrophe (')
    237      * (apostrophe does not correspond to a field type but has to be escaped
    238      * because it itself is the escape character).
    239      * Since the expansion of these special character
    240      * sequences is locale dependent, these sequences are not expanded in
    241      * an AffixPattern instance.
    242      * If these special characters are not prefixed with an apostrophe in
    243      * the affix pattern string, then they are treated verbatim just as
    244      * any other character. If an apostrophe prefixes a non special
    245      * character in the affix pattern, the apostrophe is simply ignored.
    246      *
    247      * @param affixStr the string from DecimalFormatPatternParser
    248      * @param appendTo parsed result appended here.
    249      * @param status any error parsing returned here.
    250      */
    251     static AffixPattern &parseAffixString(
    252             const UnicodeString &affixStr,
    253             AffixPattern &appendTo,
    254             UErrorCode &status);
    255 
    256     /**
    257      * Parses an affix pattern string appending it to an AffixPattern.
    258      * Parses affix pattern strings as the user would supply them.
    259      * In this function, quoting makes special characters like normal
    260      * characters whereas in parseAffixString, quoting makes special
    261      * characters special.
    262      *
    263      * @param affixStr the string from the user
    264      * @param appendTo parsed result appended here.
    265      * @param status any error parsing returned here.
    266      */
    267     static AffixPattern &parseUserAffixString(
    268             const UnicodeString &affixStr,
    269             AffixPattern &appendTo,
    270             UErrorCode &status);
    271 
    272     UBool equals(const AffixPattern &other) const {
    273         return (tokens == other.tokens)
    274                 && (literals == other.literals)
    275                 && (hasCurrencyToken == other.hasCurrencyToken)
    276                 && (hasPercentToken == other.hasPercentToken)
    277                 && (hasPermillToken == other.hasPermillToken)
    278                 && (char32Count == other.char32Count);
    279     }
    280 
    281 private:
    282     /*
    283      * Tokens stored here. Each UChar generally stands for one token. A
    284      * Each token is of form 'etttttttllllllll' llllllll is the length of
    285      * the token and ranges from 0-255. ttttttt is the token type and ranges
    286      * from 0-127. If e is set it means this is an extendo token (to be
    287      * described later). To accomodate token lengths above 255, each normal
    288      * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
    289      * the same type. Right now only kLiteral Tokens have extendo tokens.
    290      * Each extendo token provides the next 8 higher bits for the length.
    291      * If a kLiteral token is followed by 2 extendo tokens then, then the
    292      * llllllll of the next extendo token contains bits 8-15 of the length
    293      * and the last extendo token contains bits 16-23 of the length.
    294      */
    295     UnicodeString tokens;
    296 
    297     /*
    298      * The characters of the kLiteral tokens are concatenated together here.
    299      * The first characters go with the first kLiteral token, the next
    300      * characters go with the next kLiteral token etc.
    301      */
    302     UnicodeString literals;
    303     UBool hasCurrencyToken;
    304     UBool hasPercentToken;
    305     UBool hasPermillToken;
    306     int32_t char32Count;
    307     void add(ETokenType t, uint8_t count);
    308 
    309 };
    310 
    311 /**
    312  * An iterator over the tokens in an AffixPattern instance.
    313  */
    314 class U_I18N_API AffixPatternIterator : public UMemory {
    315 public:
    316 
    317     /**
    318      * Using an iterator without first calling iterator on an AffixPattern
    319      * instance to initialize the iterator results in
    320      * undefined behavior.
    321      */
    322     AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
    323     /**
    324      * Advances this iterator to the next token. Returns FALSE when there
    325      * are no more tokens. Calling the other methods after nextToken()
    326      * returns FALSE results in undefined behavior.
    327      */
    328     UBool nextToken();
    329 
    330     /**
    331      * Returns the type of token.
    332      */
    333     AffixPattern::ETokenType getTokenType() const;
    334 
    335     /**
    336      * For literal tokens, returns the literal string. Calling this for
    337      * other token types results in undefined behavior.
    338      * @param result replaced with a read-only alias to the literal string.
    339      * @return result
    340      */
    341     UnicodeString &getLiteral(UnicodeString &result) const;
    342 
    343     /**
    344      * Returns the token length. Usually 1, but for currency tokens may
    345      * be 2 for ISO code and 3 for long form.
    346      */
    347     int32_t getTokenLength() const;
    348 private:
    349     int32_t nextLiteralIndex;
    350     int32_t lastLiteralLength;
    351     int32_t nextTokenIndex;
    352     const UnicodeString *tokens;
    353     const UnicodeString *literals;
    354     friend class AffixPattern;
    355     AffixPatternIterator(const AffixPatternIterator &);
    356     AffixPatternIterator &operator=(const AffixPatternIterator &);
    357 };
    358 
    359 /**
    360  * A locale aware class that converts locale independent AffixPattern
    361  * instances into locale dependent PluralAffix instances.
    362  */
    363 class U_I18N_API AffixPatternParser : public UMemory {
    364 public:
    365 AffixPatternParser();
    366 AffixPatternParser(const DecimalFormatSymbols &symbols);
    367 void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
    368 
    369 /**
    370  * Parses affixPattern appending the result to appendTo.
    371  * @param affixPattern The affix pattern.
    372  * @param currencyAffixInfo contains the currency forms.
    373  * @param appendTo The result of parsing affixPattern is appended here.
    374  * @param status any error returned here.
    375  * @return appendTo.
    376  */
    377 PluralAffix &parse(
    378         const AffixPattern &affixPattern,
    379         const CurrencyAffixInfo &currencyAffixInfo,
    380         PluralAffix &appendTo,
    381         UErrorCode &status) const;
    382 
    383 UBool equals(const AffixPatternParser &other) const {
    384     return (fPercent == other.fPercent)
    385             && (fPermill == other.fPermill)
    386             && (fNegative == other.fNegative)
    387             && (fPositive == other.fPositive);
    388 }
    389 
    390 private:
    391 UnicodeString fPercent;
    392 UnicodeString fPermill;
    393 UnicodeString fNegative;
    394 UnicodeString fPositive;
    395 };
    396 
    397 
    398 U_NAMESPACE_END
    399 #endif /* #if !UCONFIG_NO_FORMATTING */
    400 #endif  // __AFFIX_PATTERN_PARSER_H__
    401