1 // 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 #include "numbertest.h" 9 #include "numparse_impl.h" 10 #include "static_unicode_sets.h" 11 #include "unicode/dcfmtsym.h" 12 #include "unicode/testlog.h" 13 14 #include <cmath> 15 #include <numparse_affixes.h> 16 17 using icu::unisets::get; 18 19 void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) { 20 if (exec) { 21 logln("TestSuite NumberParserTest: "); 22 } 23 TESTCASE_AUTO_BEGIN; 24 TESTCASE_AUTO(testBasic); 25 TESTCASE_AUTO(testSeriesMatcher); 26 TESTCASE_AUTO(testCombinedCurrencyMatcher); 27 TESTCASE_AUTO(testAffixPatternMatcher); 28 TESTCASE_AUTO_END; 29 } 30 31 void NumberParserTest::testBasic() { 32 IcuTestErrorCode status(*this, "testBasic"); 33 34 static const struct TestCase { 35 int32_t flags; 36 const char16_t* inputString; 37 const char16_t* patternString; 38 int32_t expectedCharsConsumed; 39 double expectedResultDouble; 40 } cases[] = {{3, u"51423", u"0", 5, 51423.}, 41 {3, u"51423x", u"0", 5, 51423.}, 42 {3, u" 51423", u"0", 6, 51423.}, 43 {3, u"51423 ", u"0", 5, 51423.}, 44 {3, u"", u"0", 10, 51423.}, 45 {3, u"x", u"0", 10, 51423.}, 46 {3, u" ", u"0", 11, 51423.}, 47 {3, u" ", u"0", 10, 51423.}, 48 {7, u"51,423", u"#,##,##0", 6, 51423.}, 49 {7, u" 51,423", u"#,##,##0", 7, 51423.}, 50 {7, u"51,423 ", u"#,##,##0", 6, 51423.}, 51 {7, u"51,423,", u"#,##,##0", 6, 51423.}, 52 {7, u"51,423,,", u"#,##,##0", 6, 51423.}, 53 {7, u"51,423.5", u"#,##,##0", 8, 51423.5}, 54 {7, u"51,423.5,", u"#,##,##0", 8, 51423.5}, 55 {7, u"51,423.5,,", u"#,##,##0", 8, 51423.5}, 56 {7, u"51,423.5.", u"#,##,##0", 8, 51423.5}, 57 {7, u"51,423.5..", u"#,##,##0", 8, 51423.5}, 58 {7, u",", u"#,##,##0", 11, 51423.}, 59 {7, u",,,", u"#,##,##0", 19, 78951423.}, 60 {7, u",.", u"#,##,##0", 18, 78951.423}, 61 {7, u",", u"#,##,##0", 11, 78000.}, 62 {7, u",.", u"#,##,##0", 18, 78000.}, 63 {7, u",.", u"#,##,##0", 18, 78000.023}, 64 {7, u"..", u"#,##,##0", 11, 78.}, 65 {7, u"1,", u"#,##,##0", 1, 1.}, 66 {7, u"1,,", u"#,##,##0", 1, 1.}, 67 {7, u"1.,", u"#,##,##0", 2, 1.}, 68 {3, u"1,.", u"#,##,##0", 3, 1.}, 69 {7, u"1..", u"#,##,##0", 2, 1.}, 70 {3, u",1", u"#,##,##0", 2, 1.}, 71 {3, u"1,1", u"#,##,##0", 1, 1.}, 72 {3, u"1,1,", u"#,##,##0", 1, 1.}, 73 {3, u"1,1,,", u"#,##,##0", 1, 1.}, 74 {3, u"1,1,1", u"#,##,##0", 1, 1.}, 75 {3, u"1,1,1,", u"#,##,##0", 1, 1.}, 76 {3, u"1,1,1,,", u"#,##,##0", 1, 1.}, 77 {3, u"-51423", u"0", 6, -51423.}, 78 {3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after 79 {3, u"+51423", u"0", 6, 51423.}, 80 {3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after 81 {3, u"%51423", u"0", 6, 51423.}, 82 {3, u"51423%", u"0", 6, 51423.}, 83 {3, u"51423%%", u"0", 6, 51423.}, 84 {3, u"51423", u"0", 6, 51423.}, 85 {3, u"51423", u"0", 6, 51423.}, 86 {3, u"51423", u"0", 6, 51423.}, 87 {3, u"", u"0", 1, INFINITY}, 88 {3, u"-", u"0", 2, -INFINITY}, 89 {3, u"@@@123 @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak? 90 {3, u"@@@123@@ ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak? 91 {3, u"a51423US dollars", u"a0", 16, 51423.}, 92 {3, u"a 51423 US dollars", u"a0", 18, 51423.}, 93 {3, u"514.23 USD", u"0", 10, 514.23}, 94 {3, u"514.23 GBP", u"0", 10, 514.23}, 95 {3, u"a b", u"a0b", 14, 51423.}, 96 {3, u"-a b", u"a0b", 15, -51423.}, 97 {3, u"a - b", u"a0b", 15, -51423.}, 98 {3, u"", u"[0];(0)", 10, 51423.}, 99 {3, u"[", u"[0];(0)", 11, 51423.}, 100 {3, u"]", u"[0];(0)", 11, 51423.}, 101 {3, u"[]", u"[0];(0)", 12, 51423.}, 102 {3, u"(", u"[0];(0)", 11, -51423.}, 103 {3, u")", u"[0];(0)", 11, -51423.}, 104 {3, u"()", u"[0];(0)", 12, -51423.}, 105 {3, u"", u"{0};{0}", 10, 51423.}, 106 {3, u"{", u"{0};{0}", 11, 51423.}, 107 {3, u"}", u"{0};{0}", 11, 51423.}, 108 {3, u"{}", u"{0};{0}", 12, 51423.}, 109 {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number 110 {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b" 111 {3, u".E", u"0", 12, 5142.}, 112 {3, u".E-", u"0", 13, 0.005142}, 113 {3, u".e-", u"0", 13, 0.005142}, 114 {7, u"5,142.50 Canadian dollars", u"#,##,##0 ", 25, 5142.5}, 115 {3, u"a$ b5", u"a b0", 5, 5.0}, 116 {3, u"1.23", u"0;0", 6, 1.23}, 117 {3, u"1.23", u"0;0", 6, -1.23}, 118 {3, u".00", u"0", 3, 0.0}, 119 {3, u" 1,234", u"a0", 35, 1234.}, // should not hang 120 {3, u"NaN", u"0", 3, NAN}, 121 {3, u"NaN E5", u"0", 6, NAN}, 122 {3, u"0", u"0", 1, 0.0}}; 123 124 parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; 125 for (auto& cas : cases) { 126 UnicodeString inputString(cas.inputString); 127 UnicodeString patternString(cas.patternString); 128 LocalPointer<const NumberParserImpl> parser( 129 NumberParserImpl::createSimpleParser( 130 Locale("en"), patternString, parseFlags, status)); 131 if (status.errDataIfFailureAndReset("createSimpleParser() failed")) { 132 continue; 133 } 134 UnicodeString message = 135 UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString(); 136 137 if (0 != (cas.flags & 0x01)) { 138 // Test greedy code path 139 ParsedNumber resultObject; 140 parser->parse(inputString, true, resultObject, status); 141 assertTrue("Greedy Parse failed: " + message, resultObject.success()); 142 assertEquals( 143 "Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd); 144 assertEquals( 145 "Greedy Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble()); 146 } 147 148 if (0 != (cas.flags & 0x02)) { 149 // Test slow code path 150 ParsedNumber resultObject; 151 parser->parse(inputString, false, resultObject, status); 152 assertTrue("Non-Greedy Parse failed: " + message, resultObject.success()); 153 assertEquals( 154 "Non-Greedy Parse failed: " + message, 155 cas.expectedCharsConsumed, 156 resultObject.charEnd); 157 assertEquals( 158 "Non-Greedy Parse failed: " + message, 159 cas.expectedResultDouble, 160 resultObject.getDouble()); 161 } 162 163 if (0 != (cas.flags & 0x04)) { 164 // Test with strict separators 165 parser.adoptInstead( 166 NumberParserImpl::createSimpleParser( 167 Locale("en"), 168 patternString, 169 parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE, 170 status)); 171 ParsedNumber resultObject; 172 parser->parse(inputString, true, resultObject, status); 173 assertTrue("Strict Parse failed: " + message, resultObject.success()); 174 assertEquals( 175 "Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd); 176 assertEquals( 177 "Strict Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble()); 178 } 179 } 180 } 181 182 void NumberParserTest::testSeriesMatcher() { 183 IcuTestErrorCode status(*this, "testSeriesMatcher"); 184 185 DecimalFormatSymbols symbols("en", status); 186 if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) { 187 return; 188 } 189 PlusSignMatcher m0(symbols, false); 190 MinusSignMatcher m1(symbols, false); 191 IgnorablesMatcher m2(unisets::DEFAULT_IGNORABLES); 192 PercentMatcher m3(symbols); 193 IgnorablesMatcher m4(unisets::DEFAULT_IGNORABLES); 194 195 ArraySeriesMatcher::MatcherArray matchers(5); 196 matchers[0] = &m0; 197 matchers[1] = &m1; 198 matchers[2] = &m2; 199 matchers[3] = &m3; 200 matchers[4] = &m4; 201 ArraySeriesMatcher series(matchers, 5); 202 203 assertFalse("", series.smokeTest(StringSegment(u"x", false))); 204 assertFalse("", series.smokeTest(StringSegment(u"-", false))); 205 assertTrue("", series.smokeTest(StringSegment(u"+", false))); 206 207 static const struct TestCase { 208 const char16_t* input; 209 int32_t expectedOffset; 210 bool expectedMaybeMore; 211 } cases[] = {{u"", 0, true}, 212 {u" ", 0, false}, 213 {u"$", 0, false}, 214 {u"+", 0, true}, 215 {u" +", 0, false}, 216 {u"+-", 0, true}, 217 {u"+ -", 0, false}, 218 {u"+- ", 0, true}, 219 {u"+- $", 0, false}, 220 {u"+-%", 3, true}, 221 {u" +- % ", 0, false}, 222 {u"+- % ", 7, true}, 223 {u"+-%$", 3, false}}; 224 225 for (auto& cas : cases) { 226 UnicodeString input(cas.input); 227 228 StringSegment segment(input, false); 229 ParsedNumber result; 230 bool actualMaybeMore = series.match(segment, result, status); 231 int actualOffset = segment.getOffset(); 232 233 assertEquals("'" + input + "'", cas.expectedOffset, actualOffset); 234 assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore); 235 } 236 } 237 238 void NumberParserTest::testCombinedCurrencyMatcher() { 239 IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher"); 240 241 IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); 242 Locale locale = Locale::getEnglish(); 243 244 DecimalFormatSymbols dfs(locale, status); 245 if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) { 246 return; 247 } 248 dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status); 249 dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status); 250 CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); 251 252 AffixTokenMatcherSetupData affixSetupData = { 253 currencySymbols, {"en", status}, ignorables, "en", 0}; 254 AffixTokenMatcherWarehouse warehouse(&affixSetupData); 255 NumberParseMatcher& matcher = warehouse.currency(status); 256 affixSetupData.parseFlags = PARSE_FLAG_NO_FOREIGN_CURRENCY; 257 AffixTokenMatcherWarehouse warehouseNoForeign(&affixSetupData); 258 NumberParseMatcher& matcherNoForeign = warehouseNoForeign.currency(status); 259 260 static const struct TestCase { 261 const char16_t* input; 262 const char16_t* expectedCurrencyCode; 263 const char16_t* expectedNoForeignCurrencyCode; 264 } cases[]{{u"", u"", u""}, 265 {u"FOO", u"", u""}, 266 {u"USD", u"USD", u""}, 267 {u"$", u"USD", u""}, 268 {u"US dollars", u"USD", u""}, 269 {u"eu", u"", u""}, 270 {u"euros", u"EUR", u""}, 271 {u"ICU", u"ICU", u"ICU"}, 272 {u"IU$", u"ICU", u"ICU"}}; 273 for (auto& cas : cases) { 274 UnicodeString input(cas.input); 275 276 { 277 StringSegment segment(input, false); 278 ParsedNumber result; 279 matcher.match(segment, result, status); 280 assertEquals( 281 "Parsing " + input, 282 cas.expectedCurrencyCode, 283 result.currencyCode); 284 assertEquals( 285 "Whole string on " + input, 286 cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(), 287 result.charEnd); 288 } 289 { 290 StringSegment segment(input, false); 291 ParsedNumber result; 292 matcherNoForeign.match(segment, result, status); 293 assertEquals( 294 "[no foreign] Parsing " + input, 295 cas.expectedNoForeignCurrencyCode, 296 result.currencyCode); 297 assertEquals( 298 "[no foreign] Whole string on " + input, 299 cas.expectedNoForeignCurrencyCode[0] == 0 ? 0 : input.length(), 300 result.charEnd); 301 } 302 } 303 } 304 305 void NumberParserTest::testAffixPatternMatcher() { 306 IcuTestErrorCode status(*this, "testAffixPatternMatcher"); 307 Locale locale = Locale::getEnglish(); 308 IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES); 309 310 DecimalFormatSymbols dfs(locale, status); 311 dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status); 312 dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status); 313 CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status); 314 315 AffixTokenMatcherSetupData affixSetupData = { 316 currencySymbols, {"en", status}, ignorables, "en", 0}; 317 AffixTokenMatcherWarehouse warehouse(&affixSetupData); 318 319 static const struct TestCase { 320 bool exactMatch; 321 const char16_t* affixPattern; 322 int32_t expectedMatcherLength; 323 const char16_t* sampleParseableString; 324 } cases[] = {{false, u"-", 1, u"-"}, 325 {false, u"+-%", 5, u"+-%"}, 326 {true, u"+-%", 3, u"+-%"}, 327 {false, u"ab c", 5, u"a bc"}, 328 {true, u"abc", 3, u"abc"}, 329 {false, u"hello-to+this%verylongstring", 59, u"hello-to+this%very USD longstring"}}; 330 331 for (auto& cas : cases) { 332 UnicodeString affixPattern(cas.affixPattern); 333 UnicodeString sampleParseableString(cas.sampleParseableString); 334 int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0; 335 336 bool success; 337 AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern( 338 affixPattern, warehouse, parseFlags, &success, status); 339 if (!status.errDataIfFailureAndReset("Creation should be successful")) { 340 341 // Check that the matcher has the expected number of children 342 assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length()); 343 344 // Check that the matcher works on a sample string 345 StringSegment segment(sampleParseableString, false); 346 ParsedNumber result; 347 matcher.match(segment, result, status); 348 assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd); 349 } 350 } 351 } 352 353 354 #endif 355