Home | History | Annotate | Download | only in intltest
      1 //  2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 #include "numbertest.h"
      9 #include "numparse_impl.h"
     10 #include "static_unicode_sets.h"
     11 #include "unicode/dcfmtsym.h"
     12 #include "unicode/testlog.h"
     13 
     14 #include <cmath>
     15 #include <numparse_affixes.h>
     16 
     17 using icu::unisets::get;
     18 
     19 void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
     20     if (exec) {
     21         logln("TestSuite NumberParserTest: ");
     22     }
     23     TESTCASE_AUTO_BEGIN;
     24         TESTCASE_AUTO(testBasic);
     25         TESTCASE_AUTO(testSeriesMatcher);
     26         TESTCASE_AUTO(testCombinedCurrencyMatcher);
     27         TESTCASE_AUTO(testAffixPatternMatcher);
     28     TESTCASE_AUTO_END;
     29 }
     30 
     31 void NumberParserTest::testBasic() {
     32     IcuTestErrorCode status(*this, "testBasic");
     33 
     34     static const struct TestCase {
     35         int32_t flags;
     36         const char16_t* inputString;
     37         const char16_t* patternString;
     38         int32_t expectedCharsConsumed;
     39         double expectedResultDouble;
     40     } cases[] = {{3, u"51423", u"0", 5, 51423.},
     41                  {3, u"51423x", u"0", 5, 51423.},
     42                  {3, u" 51423", u"0", 6, 51423.},
     43                  {3, u"51423 ", u"0", 5, 51423.},
     44                  {3, u"", u"0", 10, 51423.},
     45                  {3, u"x", u"0", 10, 51423.},
     46                  {3, u" ", u"0", 11, 51423.},
     47                  {3, u" ", u"0", 10, 51423.},
     48                  {7, u"51,423", u"#,##,##0", 6, 51423.},
     49                  {7, u" 51,423", u"#,##,##0", 7, 51423.},
     50                  {7, u"51,423 ", u"#,##,##0", 6, 51423.},
     51                  {7, u"51,423,", u"#,##,##0", 6, 51423.},
     52                  {7, u"51,423,,", u"#,##,##0", 6, 51423.},
     53                  {7, u"51,423.5", u"#,##,##0", 8, 51423.5},
     54                  {7, u"51,423.5,", u"#,##,##0", 8, 51423.5},
     55                  {7, u"51,423.5,,", u"#,##,##0", 8, 51423.5},
     56                  {7, u"51,423.5.", u"#,##,##0", 8, 51423.5},
     57                  {7, u"51,423.5..", u"#,##,##0", 8, 51423.5},
     58                  {7, u",", u"#,##,##0", 11, 51423.},
     59                  {7, u",,,", u"#,##,##0", 19, 78951423.},
     60                  {7, u",.", u"#,##,##0", 18, 78951.423},
     61                  {7, u",", u"#,##,##0", 11, 78000.},
     62                  {7, u",.", u"#,##,##0", 18, 78000.},
     63                  {7, u",.", u"#,##,##0", 18, 78000.023},
     64                  {7, u"..", u"#,##,##0", 11, 78.},
     65                  {7, u"1,", u"#,##,##0", 1, 1.},
     66                  {7, u"1,,", u"#,##,##0", 1, 1.},
     67                  {7, u"1.,", u"#,##,##0", 2, 1.},
     68                  {3, u"1,.", u"#,##,##0", 3, 1.},
     69                  {7, u"1..", u"#,##,##0", 2, 1.},
     70                  {3, u",1", u"#,##,##0", 2, 1.},
     71                  {3, u"1,1", u"#,##,##0", 1, 1.},
     72                  {3, u"1,1,", u"#,##,##0", 1, 1.},
     73                  {3, u"1,1,,", u"#,##,##0", 1, 1.},
     74                  {3, u"1,1,1", u"#,##,##0", 1, 1.},
     75                  {3, u"1,1,1,", u"#,##,##0", 1, 1.},
     76                  {3, u"1,1,1,,", u"#,##,##0", 1, 1.},
     77                  {3, u"-51423", u"0", 6, -51423.},
     78                  {3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
     79                  {3, u"+51423", u"0", 6, 51423.},
     80                  {3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
     81                  {3, u"%51423", u"0", 6, 51423.},
     82                  {3, u"51423%", u"0", 6, 51423.},
     83                  {3, u"51423%%", u"0", 6, 51423.},
     84                  {3, u"51423", u"0", 6, 51423.},
     85                  {3, u"51423", u"0", 6, 51423.},
     86                  {3, u"51423", u"0", 6, 51423.},
     87                  {3, u"", u"0", 1, INFINITY},
     88                  {3, u"-", u"0", 2, -INFINITY},
     89                  {3, u"@@@123  @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
     90                  {3, u"@@@123@@  ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
     91                  {3, u"a51423US dollars", u"a0", 16, 51423.},
     92                  {3, u"a 51423 US dollars", u"a0", 18, 51423.},
     93                  {3, u"514.23 USD", u"0", 10, 514.23},
     94                  {3, u"514.23 GBP", u"0", 10, 514.23},
     95                  {3, u"a  b", u"a0b", 14, 51423.},
     96                  {3, u"-a  b", u"a0b", 15, -51423.},
     97                  {3, u"a - b", u"a0b", 15, -51423.},
     98                  {3, u"", u"[0];(0)", 10, 51423.},
     99                  {3, u"[", u"[0];(0)", 11, 51423.},
    100                  {3, u"]", u"[0];(0)", 11, 51423.},
    101                  {3, u"[]", u"[0];(0)", 12, 51423.},
    102                  {3, u"(", u"[0];(0)", 11, -51423.},
    103                  {3, u")", u"[0];(0)", 11, -51423.},
    104                  {3, u"()", u"[0];(0)", 12, -51423.},
    105                  {3, u"", u"{0};{0}", 10, 51423.},
    106                  {3, u"{", u"{0};{0}", 11, 51423.},
    107                  {3, u"}", u"{0};{0}", 11, 51423.},
    108                  {3, u"{}", u"{0};{0}", 12, 51423.},
    109                  {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
    110                  {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
    111                  {3, u".E", u"0", 12, 5142.},
    112                  {3, u".E-", u"0", 13, 0.005142},
    113                  {3, u".e-", u"0", 13, 0.005142},
    114                  {7, u"5,142.50 Canadian dollars", u"#,##,##0 ", 25, 5142.5},
    115                  {3, u"a$ b5", u"a  b0", 5, 5.0},
    116                  {3, u"1.23", u"0;0", 6, 1.23},
    117                  {3, u"1.23", u"0;0", 6, -1.23},
    118                  {3, u".00", u"0", 3, 0.0},
    119                  {3, u"                              1,234", u"a0", 35, 1234.}, // should not hang
    120                  {3, u"NaN", u"0", 3, NAN},
    121                  {3, u"NaN E5", u"0", 6, NAN},
    122                  {3, u"0", u"0", 1, 0.0}};
    123 
    124     parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
    125     for (auto& cas : cases) {
    126         UnicodeString inputString(cas.inputString);
    127         UnicodeString patternString(cas.patternString);
    128         LocalPointer<const NumberParserImpl> parser(
    129                 NumberParserImpl::createSimpleParser(
    130                         Locale("en"), patternString, parseFlags, status));
    131         if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
    132             continue;
    133         }
    134         UnicodeString message =
    135                 UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString();
    136 
    137         if (0 != (cas.flags & 0x01)) {
    138             // Test greedy code path
    139             ParsedNumber resultObject;
    140             parser->parse(inputString, true, resultObject, status);
    141             assertTrue("Greedy Parse failed: " + message, resultObject.success());
    142             assertEquals(
    143                     "Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
    144             assertEquals(
    145                     "Greedy Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
    146         }
    147 
    148         if (0 != (cas.flags & 0x02)) {
    149             // Test slow code path
    150             ParsedNumber resultObject;
    151             parser->parse(inputString, false, resultObject, status);
    152             assertTrue("Non-Greedy Parse failed: " + message, resultObject.success());
    153             assertEquals(
    154                     "Non-Greedy Parse failed: " + message,
    155                     cas.expectedCharsConsumed,
    156                     resultObject.charEnd);
    157             assertEquals(
    158                     "Non-Greedy Parse failed: " + message,
    159                     cas.expectedResultDouble,
    160                     resultObject.getDouble());
    161         }
    162 
    163         if (0 != (cas.flags & 0x04)) {
    164             // Test with strict separators
    165             parser.adoptInstead(
    166                     NumberParserImpl::createSimpleParser(
    167                             Locale("en"),
    168                             patternString,
    169                             parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE,
    170                             status));
    171             ParsedNumber resultObject;
    172             parser->parse(inputString, true, resultObject, status);
    173             assertTrue("Strict Parse failed: " + message, resultObject.success());
    174             assertEquals(
    175                     "Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
    176             assertEquals(
    177                     "Strict Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble());
    178         }
    179     }
    180 }
    181 
    182 void NumberParserTest::testSeriesMatcher() {
    183     IcuTestErrorCode status(*this, "testSeriesMatcher");
    184 
    185     DecimalFormatSymbols symbols("en", status);
    186     if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
    187         return;
    188     }
    189     PlusSignMatcher m0(symbols, false);
    190     MinusSignMatcher m1(symbols, false);
    191     IgnorablesMatcher m2(unisets::DEFAULT_IGNORABLES);
    192     PercentMatcher m3(symbols);
    193     IgnorablesMatcher m4(unisets::DEFAULT_IGNORABLES);
    194 
    195     ArraySeriesMatcher::MatcherArray matchers(5);
    196     matchers[0] = &m0;
    197     matchers[1] = &m1;
    198     matchers[2] = &m2;
    199     matchers[3] = &m3;
    200     matchers[4] = &m4;
    201     ArraySeriesMatcher series(matchers, 5);
    202 
    203     assertFalse("", series.smokeTest(StringSegment(u"x", false)));
    204     assertFalse("", series.smokeTest(StringSegment(u"-", false)));
    205     assertTrue("", series.smokeTest(StringSegment(u"+", false)));
    206 
    207     static const struct TestCase {
    208         const char16_t* input;
    209         int32_t expectedOffset;
    210         bool expectedMaybeMore;
    211     } cases[] = {{u"", 0, true},
    212                  {u" ", 0, false},
    213                  {u"$", 0, false},
    214                  {u"+", 0, true},
    215                  {u" +", 0, false},
    216                  {u"+-", 0, true},
    217                  {u"+ -", 0, false},
    218                  {u"+-  ", 0, true},
    219                  {u"+-  $", 0, false},
    220                  {u"+-%", 3, true},
    221                  {u"  +-  %  ", 0, false},
    222                  {u"+-  %  ", 7, true},
    223                  {u"+-%$", 3, false}};
    224 
    225     for (auto& cas : cases) {
    226         UnicodeString input(cas.input);
    227 
    228         StringSegment segment(input, false);
    229         ParsedNumber result;
    230         bool actualMaybeMore = series.match(segment, result, status);
    231         int actualOffset = segment.getOffset();
    232 
    233         assertEquals("'" + input + "'", cas.expectedOffset, actualOffset);
    234         assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore);
    235     }
    236 }
    237 
    238 void NumberParserTest::testCombinedCurrencyMatcher() {
    239     IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
    240 
    241     IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
    242     Locale locale = Locale::getEnglish();
    243 
    244     DecimalFormatSymbols dfs(locale, status);
    245     if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
    246         return;
    247     }
    248     dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
    249     dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
    250     CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
    251 
    252     AffixTokenMatcherSetupData affixSetupData = {
    253             currencySymbols, {"en", status}, ignorables, "en", 0};
    254     AffixTokenMatcherWarehouse warehouse(&affixSetupData);
    255     NumberParseMatcher& matcher = warehouse.currency(status);
    256     affixSetupData.parseFlags = PARSE_FLAG_NO_FOREIGN_CURRENCY;
    257     AffixTokenMatcherWarehouse warehouseNoForeign(&affixSetupData);
    258     NumberParseMatcher& matcherNoForeign = warehouseNoForeign.currency(status);
    259 
    260     static const struct TestCase {
    261         const char16_t* input;
    262         const char16_t* expectedCurrencyCode;
    263         const char16_t* expectedNoForeignCurrencyCode;
    264     } cases[]{{u"", u"", u""},
    265               {u"FOO", u"", u""},
    266               {u"USD", u"USD", u""},
    267               {u"$", u"USD", u""},
    268               {u"US dollars", u"USD", u""},
    269               {u"eu", u"", u""},
    270               {u"euros", u"EUR", u""},
    271               {u"ICU", u"ICU", u"ICU"},
    272               {u"IU$", u"ICU", u"ICU"}};
    273     for (auto& cas : cases) {
    274         UnicodeString input(cas.input);
    275 
    276         {
    277             StringSegment segment(input, false);
    278             ParsedNumber result;
    279             matcher.match(segment, result, status);
    280             assertEquals(
    281                     "Parsing " + input,
    282                     cas.expectedCurrencyCode,
    283                     result.currencyCode);
    284             assertEquals(
    285                     "Whole string on " + input,
    286                     cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
    287                     result.charEnd);
    288         }
    289         {
    290             StringSegment segment(input, false);
    291             ParsedNumber result;
    292             matcherNoForeign.match(segment, result, status);
    293             assertEquals(
    294                     "[no foreign] Parsing " + input,
    295                     cas.expectedNoForeignCurrencyCode,
    296                     result.currencyCode);
    297             assertEquals(
    298                     "[no foreign] Whole string on " + input,
    299                     cas.expectedNoForeignCurrencyCode[0] == 0 ? 0 : input.length(),
    300                     result.charEnd);
    301         }
    302     }
    303 }
    304 
    305 void NumberParserTest::testAffixPatternMatcher() {
    306     IcuTestErrorCode status(*this, "testAffixPatternMatcher");
    307     Locale locale = Locale::getEnglish();
    308     IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
    309 
    310     DecimalFormatSymbols dfs(locale, status);
    311     dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
    312     dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
    313     CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
    314 
    315     AffixTokenMatcherSetupData affixSetupData = {
    316             currencySymbols, {"en", status}, ignorables, "en", 0};
    317     AffixTokenMatcherWarehouse warehouse(&affixSetupData);
    318 
    319     static const struct TestCase {
    320         bool exactMatch;
    321         const char16_t* affixPattern;
    322         int32_t expectedMatcherLength;
    323         const char16_t* sampleParseableString;
    324     } cases[] = {{false, u"-", 1, u"-"},
    325                  {false, u"+-%", 5, u"+-%"},
    326                  {true, u"+-%", 3, u"+-%"},
    327                  {false, u"ab c", 5, u"a    bc"},
    328                  {true, u"abc", 3, u"abc"},
    329                  {false, u"hello-to+this%verylongstring", 59, u"hello-to+this%very USD longstring"}};
    330 
    331     for (auto& cas : cases) {
    332         UnicodeString affixPattern(cas.affixPattern);
    333         UnicodeString sampleParseableString(cas.sampleParseableString);
    334         int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0;
    335 
    336         bool success;
    337         AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern(
    338                 affixPattern, warehouse, parseFlags, &success, status);
    339         if (!status.errDataIfFailureAndReset("Creation should be successful")) {
    340 
    341             // Check that the matcher has the expected number of children
    342             assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length());
    343 
    344             // Check that the matcher works on a sample string
    345             StringSegment segment(sampleParseableString, false);
    346             ParsedNumber result;
    347             matcher.match(segment, result, status);
    348             assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd);
    349         }
    350     }
    351 }
    352 
    353 
    354 #endif
    355