Home | History | Annotate | Download | only in i18n
      1 //  2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 #ifndef __SOURCE_NUMBER_SKELETONS_H__
      8 #define __SOURCE_NUMBER_SKELETONS_H__
      9 
     10 #include "number_types.h"
     11 #include "numparse_types.h"
     12 #include "unicode/ucharstrie.h"
     13 
     14 using icu::numparse::impl::StringSegment;
     15 
     16 U_NAMESPACE_BEGIN namespace number {
     17 namespace impl {
     18 
     19 // Forward-declaration
     20 struct SeenMacroProps;
     21 
     22 // namespace for enums and entrypoint functions
     23 namespace skeleton {
     24 
     25 ///////////////////////////////////////////////////////////////////////////////////////
     26 // NOTE: For an example of how to add a new stem to the number skeleton parser, see: //
     27 // http://bugs.icu-project.org/trac/changeset/41193                                  //
     28 ///////////////////////////////////////////////////////////////////////////////////////
     29 
     30 /**
     31  * While parsing a skeleton, this enum records what type of option we expect to find next.
     32  */
     33 enum ParseState {
     34 
     35     // Section 0: We expect whitespace or a stem, but not an option:
     36 
     37     STATE_NULL,
     38 
     39     // Section 1: We might accept an option, but it is not required:
     40 
     41     STATE_SCIENTIFIC,
     42     STATE_FRACTION_PRECISION,
     43 
     44     // Section 2: An option is required:
     45 
     46     STATE_INCREMENT_PRECISION,
     47     STATE_MEASURE_UNIT,
     48     STATE_PER_MEASURE_UNIT,
     49     STATE_CURRENCY_UNIT,
     50     STATE_INTEGER_WIDTH,
     51     STATE_NUMBERING_SYSTEM,
     52     STATE_SCALE,
     53 };
     54 
     55 /**
     56  * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem
     57  * string literal written in upper snake case.
     58  *
     59  * @see StemToObject
     60  * @see #SERIALIZED_STEM_TRIE
     61  */
     62 enum StemEnum {
     63 
     64     // Section 1: Stems that do not require an option:
     65 
     66     STEM_COMPACT_SHORT,
     67     STEM_COMPACT_LONG,
     68     STEM_SCIENTIFIC,
     69     STEM_ENGINEERING,
     70     STEM_NOTATION_SIMPLE,
     71     STEM_BASE_UNIT,
     72     STEM_PERCENT,
     73     STEM_PERMILLE,
     74     STEM_PRECISION_INTEGER,
     75     STEM_PRECISION_UNLIMITED,
     76     STEM_PRECISION_CURRENCY_STANDARD,
     77     STEM_PRECISION_CURRENCY_CASH,
     78     STEM_ROUNDING_MODE_CEILING,
     79     STEM_ROUNDING_MODE_FLOOR,
     80     STEM_ROUNDING_MODE_DOWN,
     81     STEM_ROUNDING_MODE_UP,
     82     STEM_ROUNDING_MODE_HALF_EVEN,
     83     STEM_ROUNDING_MODE_HALF_DOWN,
     84     STEM_ROUNDING_MODE_HALF_UP,
     85     STEM_ROUNDING_MODE_UNNECESSARY,
     86     STEM_GROUP_OFF,
     87     STEM_GROUP_MIN2,
     88     STEM_GROUP_AUTO,
     89     STEM_GROUP_ON_ALIGNED,
     90     STEM_GROUP_THOUSANDS,
     91     STEM_LATIN,
     92     STEM_UNIT_WIDTH_NARROW,
     93     STEM_UNIT_WIDTH_SHORT,
     94     STEM_UNIT_WIDTH_FULL_NAME,
     95     STEM_UNIT_WIDTH_ISO_CODE,
     96     STEM_UNIT_WIDTH_HIDDEN,
     97     STEM_SIGN_AUTO,
     98     STEM_SIGN_ALWAYS,
     99     STEM_SIGN_NEVER,
    100     STEM_SIGN_ACCOUNTING,
    101     STEM_SIGN_ACCOUNTING_ALWAYS,
    102     STEM_SIGN_EXCEPT_ZERO,
    103     STEM_SIGN_ACCOUNTING_EXCEPT_ZERO,
    104     STEM_DECIMAL_AUTO,
    105     STEM_DECIMAL_ALWAYS,
    106 
    107     // Section 2: Stems that DO require an option:
    108 
    109     STEM_PRECISION_INCREMENT,
    110     STEM_MEASURE_UNIT,
    111     STEM_PER_MEASURE_UNIT,
    112     STEM_CURRENCY,
    113     STEM_INTEGER_WIDTH,
    114     STEM_NUMBERING_SYSTEM,
    115     STEM_SCALE,
    116 };
    117 
    118 /**
    119  * Creates a NumberFormatter corresponding to the given skeleton string.
    120  *
    121  * @param skeletonString
    122  *            A number skeleton string, possibly not in its shortest form.
    123  * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string.
    124  */
    125 UnlocalizedNumberFormatter create(const UnicodeString& skeletonString, UErrorCode& status);
    126 
    127 /**
    128  * Create a skeleton string corresponding to the given NumberFormatter.
    129  *
    130  * @param macros
    131  *            The NumberFormatter options object.
    132  * @return A skeleton string in normalized form.
    133  */
    134 UnicodeString generate(const MacroProps& macros, UErrorCode& status);
    135 
    136 /**
    137  * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop.
    138  *
    139  * Internal: use the create() endpoint instead of this function.
    140  */
    141 MacroProps parseSkeleton(const UnicodeString& skeletonString, UErrorCode& status);
    142 
    143 /**
    144  * Given that the current segment represents a stem, parse it and save the result.
    145  *
    146  * @return The next state after parsing this stem, corresponding to what subset of options to expect.
    147  */
    148 ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen,
    149                      MacroProps& macros, UErrorCode& status);
    150 
    151 /**
    152  * Given that the current segment represents an option, parse it and save the result.
    153  *
    154  * @return The next state after parsing this option, corresponding to what subset of options to
    155  *         expect next.
    156  */
    157 ParseState
    158 parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    159 
    160 } // namespace skeleton
    161 
    162 
    163 /**
    164  * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This
    165  * applies to only the "Section 1" stems, those that are well-defined without an option.
    166  */
    167 namespace stem_to_object {
    168 
    169 Notation notation(skeleton::StemEnum stem);
    170 
    171 MeasureUnit unit(skeleton::StemEnum stem);
    172 
    173 Precision precision(skeleton::StemEnum stem);
    174 
    175 UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem);
    176 
    177 UGroupingStrategy groupingStrategy(skeleton::StemEnum stem);
    178 
    179 UNumberUnitWidth unitWidth(skeleton::StemEnum stem);
    180 
    181 UNumberSignDisplay signDisplay(skeleton::StemEnum stem);
    182 
    183 UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem);
    184 
    185 } // namespace stem_to_object
    186 
    187 /**
    188  * Namespace for utility methods that convert from enums to stem strings. More complex object conversions
    189  * take place in the object_to_stem_string namespace.
    190  */
    191 namespace enum_to_stem_string {
    192 
    193 void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb);
    194 
    195 void groupingStrategy(UGroupingStrategy value, UnicodeString& sb);
    196 
    197 void unitWidth(UNumberUnitWidth value, UnicodeString& sb);
    198 
    199 void signDisplay(UNumberSignDisplay value, UnicodeString& sb);
    200 
    201 void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb);
    202 
    203 } // namespace enum_to_stem_string
    204 
    205 /**
    206  * Namespace for utility methods for processing stems and options that cannot be interpreted literally.
    207  */
    208 namespace blueprint_helpers {
    209 
    210 /** @return Whether we successfully found and parsed an exponent width option. */
    211 bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    212 
    213 void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status);
    214 
    215 /** @return Whether we successfully found and parsed an exponent sign option. */
    216 bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    217 
    218 void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    219 
    220 void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status);
    221 
    222 void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    223 
    224 void generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, UErrorCode& status);
    225 
    226 void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    227 
    228 void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    229 
    230 void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status);
    231 
    232 void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    233 
    234 void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status);
    235 
    236 /** @return Whether we successfully found and parsed a frac-sig option. */
    237 bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    238 
    239 void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    240 
    241 void
    242 generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status);
    243 
    244 void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    245 
    246 void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status);
    247 
    248 void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    249 
    250 void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status);
    251 
    252 void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status);
    253 
    254 void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb,
    255                               UErrorCode& status);
    256 
    257 } // namespace blueprint_helpers
    258 
    259 /**
    260  * Class for utility methods for generating a token corresponding to each macro-prop. Each method
    261  * returns whether or not a token was written to the string builder.
    262  *
    263  * This needs to be a class, not a namespace, so it can be friended.
    264  */
    265 class GeneratorHelpers {
    266   public:
    267     /**
    268      * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given
    269      * StringBuilder.
    270      *
    271      * Internal: use the create() endpoint instead of this function.
    272      */
    273     static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    274 
    275   private:
    276     static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    277 
    278     static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    279 
    280     static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    281 
    282     static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    283 
    284     static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    285 
    286     static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    287 
    288     static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    289 
    290     static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    291 
    292     static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    293 
    294     static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    295 
    296     static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    297 
    298     static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status);
    299 
    300 };
    301 
    302 /**
    303  * Struct for null-checking.
    304  * In Java, we can just check the object reference. In C++, we need a different method.
    305  */
    306 struct SeenMacroProps {
    307     bool notation = false;
    308     bool unit = false;
    309     bool perUnit = false;
    310     bool precision = false;
    311     bool roundingMode = false;
    312     bool grouper = false;
    313     bool padder = false;
    314     bool integerWidth = false;
    315     bool symbols = false;
    316     bool unitWidth = false;
    317     bool sign = false;
    318     bool decimal = false;
    319     bool scale = false;
    320 };
    321 
    322 } // namespace impl
    323 } // namespace number
    324 U_NAMESPACE_END
    325 
    326 #endif //__SOURCE_NUMBER_SKELETONS_H__
    327 #endif /* #if !UCONFIG_NO_FORMATTING */
    328