1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1998-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File uscnnf_p.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 12/02/98 stephen Creation. 15 * 03/13/99 stephen Modified for new C API. 16 ******************************************************************************* 17 */ 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_FORMATTING 22 23 #include "unicode/uchar.h" 24 #include "unicode/ustring.h" 25 #include "unicode/unum.h" 26 #include "unicode/udat.h" 27 #include "unicode/uset.h" 28 #include "uscanf.h" 29 #include "ufmt_cmn.h" 30 #include "ufile.h" 31 #include "locbund.h" 32 33 #include "cmemory.h" 34 #include "ustr_cnv.h" 35 36 /* flag characters for u_scanf */ 37 #define FLAG_ASTERISK 0x002A 38 #define FLAG_PAREN 0x0028 39 40 #define ISFLAG(s) (s) == FLAG_ASTERISK || \ 41 (s) == FLAG_PAREN 42 43 /* special characters for u_scanf */ 44 #define SPEC_DOLLARSIGN 0x0024 45 46 /* unicode digits */ 47 #define DIGIT_ZERO 0x0030 48 #define DIGIT_ONE 0x0031 49 #define DIGIT_TWO 0x0032 50 #define DIGIT_THREE 0x0033 51 #define DIGIT_FOUR 0x0034 52 #define DIGIT_FIVE 0x0035 53 #define DIGIT_SIX 0x0036 54 #define DIGIT_SEVEN 0x0037 55 #define DIGIT_EIGHT 0x0038 56 #define DIGIT_NINE 0x0039 57 58 #define ISDIGIT(s) (s) == DIGIT_ZERO || \ 59 (s) == DIGIT_ONE || \ 60 (s) == DIGIT_TWO || \ 61 (s) == DIGIT_THREE || \ 62 (s) == DIGIT_FOUR || \ 63 (s) == DIGIT_FIVE || \ 64 (s) == DIGIT_SIX || \ 65 (s) == DIGIT_SEVEN || \ 66 (s) == DIGIT_EIGHT || \ 67 (s) == DIGIT_NINE 68 69 /* u_scanf modifiers */ 70 #define MOD_H 0x0068 71 #define MOD_LOWERL 0x006C 72 #define MOD_L 0x004C 73 74 #define ISMOD(s) (s) == MOD_H || \ 75 (s) == MOD_LOWERL || \ 76 (s) == MOD_L 77 78 /** 79 * Struct encapsulating a single uscanf format specification. 80 */ 81 typedef struct u_scanf_spec_info { 82 int32_t fWidth; /* Width */ 83 84 UChar fSpec; /* Format specification */ 85 86 UChar fPadChar; /* Padding character */ 87 88 UBool fSkipArg; /* TRUE if arg should be skipped */ 89 UBool fIsLongDouble; /* L flag */ 90 UBool fIsShort; /* h flag */ 91 UBool fIsLong; /* l flag */ 92 UBool fIsLongLong; /* ll flag */ 93 UBool fIsString; /* TRUE if this is a NULL-terminated string. */ 94 } u_scanf_spec_info; 95 96 97 /** 98 * Struct encapsulating a single u_scanf format specification. 99 */ 100 typedef struct u_scanf_spec { 101 u_scanf_spec_info fInfo; /* Information on this spec */ 102 int32_t fArgPos; /* Position of data in arg list */ 103 } u_scanf_spec; 104 105 /** 106 * Parse a single u_scanf format specifier in Unicode. 107 * @param fmt A pointer to a '%' character in a u_scanf format specification. 108 * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed 109 * format specifier. 110 * @return The number of characters contained in this specifier. 111 */ 112 static int32_t 113 u_scanf_parse_spec (const UChar *fmt, 114 u_scanf_spec *spec) 115 { 116 const UChar *s = fmt; 117 const UChar *backup; 118 u_scanf_spec_info *info = &(spec->fInfo); 119 120 /* initialize spec to default values */ 121 spec->fArgPos = -1; 122 123 info->fWidth = -1; 124 info->fSpec = 0x0000; 125 info->fPadChar = 0x0020; 126 info->fSkipArg = FALSE; 127 info->fIsLongDouble = FALSE; 128 info->fIsShort = FALSE; 129 info->fIsLong = FALSE; 130 info->fIsLongLong = FALSE; 131 info->fIsString = TRUE; 132 133 134 /* skip over the initial '%' */ 135 s++; 136 137 /* Check for positional argument */ 138 if(ISDIGIT(*s)) { 139 140 /* Save the current position */ 141 backup = s; 142 143 /* handle positional parameters */ 144 if(ISDIGIT(*s)) { 145 spec->fArgPos = (int) (*s++ - DIGIT_ZERO); 146 147 while(ISDIGIT(*s)) { 148 spec->fArgPos *= 10; 149 spec->fArgPos += (int) (*s++ - DIGIT_ZERO); 150 } 151 } 152 153 /* if there is no '$', don't read anything */ 154 if(*s != SPEC_DOLLARSIGN) { 155 spec->fArgPos = -1; 156 s = backup; 157 } 158 /* munge the '$' */ 159 else 160 s++; 161 } 162 163 /* Get any format flags */ 164 while(ISFLAG(*s)) { 165 switch(*s++) { 166 167 /* skip argument */ 168 case FLAG_ASTERISK: 169 info->fSkipArg = TRUE; 170 break; 171 172 /* pad character specified */ 173 case FLAG_PAREN: 174 175 /* first four characters are hex values for pad char */ 176 info->fPadChar = (UChar)ufmt_digitvalue(*s++); 177 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 178 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 179 info->fPadChar = (UChar)((info->fPadChar * 16) + ufmt_digitvalue(*s++)); 180 181 /* final character is ignored */ 182 s++; 183 184 break; 185 } 186 } 187 188 /* Get the width */ 189 if(ISDIGIT(*s)){ 190 info->fWidth = (int) (*s++ - DIGIT_ZERO); 191 192 while(ISDIGIT(*s)) { 193 info->fWidth *= 10; 194 info->fWidth += (int) (*s++ - DIGIT_ZERO); 195 } 196 } 197 198 /* Get any modifiers */ 199 if(ISMOD(*s)) { 200 switch(*s++) { 201 202 /* short */ 203 case MOD_H: 204 info->fIsShort = TRUE; 205 break; 206 207 /* long or long long */ 208 case MOD_LOWERL: 209 if(*s == MOD_LOWERL) { 210 info->fIsLongLong = TRUE; 211 /* skip over the next 'l' */ 212 s++; 213 } 214 else 215 info->fIsLong = TRUE; 216 break; 217 218 /* long double */ 219 case MOD_L: 220 info->fIsLongDouble = TRUE; 221 break; 222 } 223 } 224 225 /* finally, get the specifier letter */ 226 info->fSpec = *s++; 227 228 /* return # of characters in this specifier */ 229 return (int32_t)(s - fmt); 230 } 231 232 #define UP_PERCENT 0x0025 233 234 235 /* ANSI style formatting */ 236 /* Use US-ASCII characters only for formatting */ 237 238 /* % */ 239 #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler} 240 /* s */ 241 #define UFMT_STRING {ufmt_string, u_scanf_string_handler} 242 /* c */ 243 #define UFMT_CHAR {ufmt_string, u_scanf_char_handler} 244 /* d, i */ 245 #define UFMT_INT {ufmt_int, u_scanf_integer_handler} 246 /* u */ 247 #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler} 248 /* o */ 249 #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler} 250 /* x, X */ 251 #define UFMT_HEX {ufmt_int, u_scanf_hex_handler} 252 /* f */ 253 #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler} 254 /* e, E */ 255 #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler} 256 /* g, G */ 257 #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler} 258 /* n */ 259 #define UFMT_COUNT {ufmt_count, u_scanf_count_handler} 260 /* [ */ 261 #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler} 262 263 /* non-ANSI extensions */ 264 /* Use US-ASCII characters only for formatting */ 265 266 /* p */ 267 #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler} 268 /* V */ 269 #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler} 270 /* P */ 271 #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler} 272 /* C K is old format */ 273 #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler} 274 /* S U is old format */ 275 #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler} 276 277 278 #define UFMT_EMPTY {ufmt_empty, NULL} 279 280 /** 281 * A u_scanf handler function. 282 * A u_scanf handler is responsible for handling a single u_scanf 283 * format specification, for example 'd' or 's'. 284 * @param stream The UFILE to which to write output. 285 * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing 286 * information on the format specification. 287 * @param args A pointer to the argument data 288 * @param fmt A pointer to the first character in the format string 289 * following the spec. 290 * @param fmtConsumed On output, set to the number of characters consumed 291 * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width. 292 * @param argConverted The number of arguments converted and assigned, or -1 if an 293 * error occurred. 294 * @return The number of code points consumed during reading. 295 */ 296 typedef int32_t (*u_scanf_handler) (UFILE *stream, 297 u_scanf_spec_info *info, 298 ufmt_args *args, 299 const UChar *fmt, 300 int32_t *fmtConsumed, 301 int32_t *argConverted); 302 303 typedef struct u_scanf_info { 304 ufmt_type_info info; 305 u_scanf_handler handler; 306 } u_scanf_info; 307 308 #define USCANF_NUM_FMT_HANDLERS 108 309 #define USCANF_SYMBOL_BUFFER_SIZE 8 310 311 /* We do not use handlers for 0-0x1f */ 312 #define USCANF_BASE_FMT_HANDLERS 0x20 313 314 315 static int32_t 316 u_scanf_skip_leading_ws(UFILE *input, 317 UChar pad) 318 { 319 UChar c; 320 int32_t count = 0; 321 UBool isNotEOF; 322 323 /* skip all leading ws in the input */ 324 while( (isNotEOF = ufile_getch(input, &c)) && (c == pad || u_isWhitespace(c)) ) 325 { 326 count++; 327 } 328 329 /* put the final character back on the input */ 330 if(isNotEOF) 331 u_fungetc(c, input); 332 333 return count; 334 } 335 336 /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */ 337 static int32_t 338 u_scanf_skip_leading_positive_sign(UFILE *input, 339 UNumberFormat *format, 340 UErrorCode *status) 341 { 342 UChar c; 343 int32_t count = 0; 344 UBool isNotEOF; 345 UChar plusSymbol[USCANF_SYMBOL_BUFFER_SIZE]; 346 int32_t symbolLen; 347 UErrorCode localStatus = U_ZERO_ERROR; 348 349 if (U_SUCCESS(*status)) { 350 symbolLen = unum_getSymbol(format, 351 UNUM_PLUS_SIGN_SYMBOL, 352 plusSymbol, 353 sizeof(plusSymbol)/sizeof(*plusSymbol), 354 &localStatus); 355 356 if (U_SUCCESS(localStatus)) { 357 /* skip all leading ws in the input */ 358 while( (isNotEOF = ufile_getch(input, &c)) && (count < symbolLen && c == plusSymbol[count]) ) 359 { 360 count++; 361 } 362 363 /* put the final character back on the input */ 364 if(isNotEOF) { 365 u_fungetc(c, input); 366 } 367 } 368 } 369 370 return count; 371 } 372 373 static int32_t 374 u_scanf_simple_percent_handler(UFILE *input, 375 u_scanf_spec_info *info, 376 ufmt_args *args, 377 const UChar *fmt, 378 int32_t *fmtConsumed, 379 int32_t *argConverted) 380 { 381 /* make sure the next character in the input is a percent */ 382 *argConverted = 0; 383 if(u_fgetc(input) != 0x0025) { 384 *argConverted = -1; 385 } 386 return 1; 387 } 388 389 static int32_t 390 u_scanf_count_handler(UFILE *input, 391 u_scanf_spec_info *info, 392 ufmt_args *args, 393 const UChar *fmt, 394 int32_t *fmtConsumed, 395 int32_t *argConverted) 396 { 397 /* in the special case of count, the u_scanf_spec_info's width */ 398 /* will contain the # of items converted thus far */ 399 if (!info->fSkipArg) { 400 if (info->fIsShort) 401 *(int16_t*)(args[0].ptrValue) = (int16_t)(UINT16_MAX & info->fWidth); 402 else if (info->fIsLongLong) 403 *(int64_t*)(args[0].ptrValue) = info->fWidth; 404 else 405 *(int32_t*)(args[0].ptrValue) = (int32_t)(UINT32_MAX & info->fWidth); 406 } 407 *argConverted = 0; 408 409 /* we converted 0 args */ 410 return 0; 411 } 412 413 static int32_t 414 u_scanf_double_handler(UFILE *input, 415 u_scanf_spec_info *info, 416 ufmt_args *args, 417 const UChar *fmt, 418 int32_t *fmtConsumed, 419 int32_t *argConverted) 420 { 421 int32_t len; 422 double num; 423 UNumberFormat *format; 424 int32_t parsePos = 0; 425 int32_t skipped; 426 UErrorCode status = U_ZERO_ERROR; 427 428 429 /* skip all ws in the input */ 430 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 431 432 /* fill the input's internal buffer */ 433 ufile_fill_uchar_buffer(input); 434 435 /* determine the size of the input's buffer */ 436 len = (int32_t)(input->str.fLimit - input->str.fPos); 437 438 /* truncate to the width, if specified */ 439 if(info->fWidth != -1) 440 len = ufmt_min(len, info->fWidth); 441 442 /* get the formatter */ 443 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 444 445 /* handle error */ 446 if(format == 0) 447 return 0; 448 449 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 450 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 451 452 /* parse the number */ 453 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 454 455 if (!info->fSkipArg) { 456 if (info->fIsLong) 457 *(double*)(args[0].ptrValue) = num; 458 else if (info->fIsLongDouble) 459 *(long double*)(args[0].ptrValue) = num; 460 else 461 *(float*)(args[0].ptrValue) = (float)num; 462 } 463 464 /* mask off any necessary bits */ 465 /* if(! info->fIsLong_double) 466 num &= DBL_MAX;*/ 467 468 /* update the input's position to reflect consumed data */ 469 input->str.fPos += parsePos; 470 471 /* we converted 1 arg */ 472 *argConverted = !info->fSkipArg; 473 return parsePos + skipped; 474 } 475 476 #define UPRINTF_SYMBOL_BUFFER_SIZE 8 477 478 static int32_t 479 u_scanf_scientific_handler(UFILE *input, 480 u_scanf_spec_info *info, 481 ufmt_args *args, 482 const UChar *fmt, 483 int32_t *fmtConsumed, 484 int32_t *argConverted) 485 { 486 int32_t len; 487 double num; 488 UNumberFormat *format; 489 int32_t parsePos = 0; 490 int32_t skipped; 491 UErrorCode status = U_ZERO_ERROR; 492 UChar srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; 493 int32_t srcLen, expLen; 494 UChar expBuf[UPRINTF_SYMBOL_BUFFER_SIZE]; 495 496 497 /* skip all ws in the input */ 498 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 499 500 /* fill the input's internal buffer */ 501 ufile_fill_uchar_buffer(input); 502 503 /* determine the size of the input's buffer */ 504 len = (int32_t)(input->str.fLimit - input->str.fPos); 505 506 /* truncate to the width, if specified */ 507 if(info->fWidth != -1) 508 len = ufmt_min(len, info->fWidth); 509 510 /* get the formatter */ 511 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); 512 513 /* handle error */ 514 if(format == 0) 515 return 0; 516 517 /* set the appropriate flags on the formatter */ 518 519 srcLen = unum_getSymbol(format, 520 UNUM_EXPONENTIAL_SYMBOL, 521 srcExpBuf, 522 sizeof(srcExpBuf), 523 &status); 524 525 /* Upper/lower case the e */ 526 if (info->fSpec == (UChar)0x65 /* e */) { 527 expLen = u_strToLower(expBuf, (int32_t)sizeof(expBuf), 528 srcExpBuf, srcLen, 529 input->str.fBundle.fLocale, 530 &status); 531 } 532 else { 533 expLen = u_strToUpper(expBuf, (int32_t)sizeof(expBuf), 534 srcExpBuf, srcLen, 535 input->str.fBundle.fLocale, 536 &status); 537 } 538 539 unum_setSymbol(format, 540 UNUM_EXPONENTIAL_SYMBOL, 541 expBuf, 542 expLen, 543 &status); 544 545 546 547 548 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 549 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 550 551 /* parse the number */ 552 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 553 554 if (!info->fSkipArg) { 555 if (info->fIsLong) 556 *(double*)(args[0].ptrValue) = num; 557 else if (info->fIsLongDouble) 558 *(long double*)(args[0].ptrValue) = num; 559 else 560 *(float*)(args[0].ptrValue) = (float)num; 561 } 562 563 /* mask off any necessary bits */ 564 /* if(! info->fIsLong_double) 565 num &= DBL_MAX;*/ 566 567 /* update the input's position to reflect consumed data */ 568 input->str.fPos += parsePos; 569 570 /* we converted 1 arg */ 571 *argConverted = !info->fSkipArg; 572 return parsePos + skipped; 573 } 574 575 static int32_t 576 u_scanf_scidbl_handler(UFILE *input, 577 u_scanf_spec_info *info, 578 ufmt_args *args, 579 const UChar *fmt, 580 int32_t *fmtConsumed, 581 int32_t *argConverted) 582 { 583 int32_t len; 584 double num; 585 UNumberFormat *scientificFormat, *genericFormat; 586 /*int32_t scientificResult, genericResult;*/ 587 double scientificResult, genericResult; 588 int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0; 589 int32_t skipped; 590 UErrorCode scientificStatus = U_ZERO_ERROR; 591 UErrorCode genericStatus = U_ZERO_ERROR; 592 593 594 /* since we can't determine by scanning the characters whether */ 595 /* a number was formatted in the 'f' or 'g' styles, parse the */ 596 /* string with both formatters, and assume whichever one */ 597 /* parsed the most is the correct formatter to use */ 598 599 600 /* skip all ws in the input */ 601 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 602 603 /* fill the input's internal buffer */ 604 ufile_fill_uchar_buffer(input); 605 606 /* determine the size of the input's buffer */ 607 len = (int32_t)(input->str.fLimit - input->str.fPos); 608 609 /* truncate to the width, if specified */ 610 if(info->fWidth != -1) 611 len = ufmt_min(len, info->fWidth); 612 613 /* get the formatters */ 614 scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC); 615 genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 616 617 /* handle error */ 618 if(scientificFormat == 0 || genericFormat == 0) 619 return 0; 620 621 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 622 skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus); 623 624 /* parse the number using each format*/ 625 626 scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len, 627 &scientificParsePos, &scientificStatus); 628 629 genericResult = unum_parseDouble(genericFormat, input->str.fPos, len, 630 &genericParsePos, &genericStatus); 631 632 /* determine which parse made it farther */ 633 if(scientificParsePos > genericParsePos) { 634 /* stash the result in num */ 635 num = scientificResult; 636 /* update the input's position to reflect consumed data */ 637 parsePos += scientificParsePos; 638 } 639 else { 640 /* stash the result in num */ 641 num = genericResult; 642 /* update the input's position to reflect consumed data */ 643 parsePos += genericParsePos; 644 } 645 input->str.fPos += parsePos; 646 647 if (!info->fSkipArg) { 648 if (info->fIsLong) 649 *(double*)(args[0].ptrValue) = num; 650 else if (info->fIsLongDouble) 651 *(long double*)(args[0].ptrValue) = num; 652 else 653 *(float*)(args[0].ptrValue) = (float)num; 654 } 655 656 /* mask off any necessary bits */ 657 /* if(! info->fIsLong_double) 658 num &= DBL_MAX;*/ 659 660 /* we converted 1 arg */ 661 *argConverted = !info->fSkipArg; 662 return parsePos + skipped; 663 } 664 665 static int32_t 666 u_scanf_integer_handler(UFILE *input, 667 u_scanf_spec_info *info, 668 ufmt_args *args, 669 const UChar *fmt, 670 int32_t *fmtConsumed, 671 int32_t *argConverted) 672 { 673 int32_t len; 674 void *num = (void*) (args[0].ptrValue); 675 UNumberFormat *format; 676 int32_t parsePos = 0; 677 int32_t skipped; 678 UErrorCode status = U_ZERO_ERROR; 679 int64_t result; 680 681 682 /* skip all ws in the input */ 683 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 684 685 /* fill the input's internal buffer */ 686 ufile_fill_uchar_buffer(input); 687 688 /* determine the size of the input's buffer */ 689 len = (int32_t)(input->str.fLimit - input->str.fPos); 690 691 /* truncate to the width, if specified */ 692 if(info->fWidth != -1) 693 len = ufmt_min(len, info->fWidth); 694 695 /* get the formatter */ 696 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL); 697 698 /* handle error */ 699 if(format == 0) 700 return 0; 701 702 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 703 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 704 705 /* parse the number */ 706 result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status); 707 708 /* mask off any necessary bits */ 709 if (!info->fSkipArg) { 710 if (info->fIsShort) 711 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 712 else if (info->fIsLongLong) 713 *(int64_t*)num = result; 714 else 715 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 716 } 717 718 /* update the input's position to reflect consumed data */ 719 input->str.fPos += parsePos; 720 721 /* we converted 1 arg */ 722 *argConverted = !info->fSkipArg; 723 return parsePos + skipped; 724 } 725 726 static int32_t 727 u_scanf_uinteger_handler(UFILE *input, 728 u_scanf_spec_info *info, 729 ufmt_args *args, 730 const UChar *fmt, 731 int32_t *fmtConsumed, 732 int32_t *argConverted) 733 { 734 /* TODO Fix this when Numberformat handles uint64_t */ 735 return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted); 736 } 737 738 static int32_t 739 u_scanf_percent_handler(UFILE *input, 740 u_scanf_spec_info *info, 741 ufmt_args *args, 742 const UChar *fmt, 743 int32_t *fmtConsumed, 744 int32_t *argConverted) 745 { 746 int32_t len; 747 double num; 748 UNumberFormat *format; 749 int32_t parsePos = 0; 750 int32_t skipped; 751 UErrorCode status = U_ZERO_ERROR; 752 753 754 /* skip all ws in the input */ 755 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 756 757 /* fill the input's internal buffer */ 758 ufile_fill_uchar_buffer(input); 759 760 /* determine the size of the input's buffer */ 761 len = (int32_t)(input->str.fLimit - input->str.fPos); 762 763 /* truncate to the width, if specified */ 764 if(info->fWidth != -1) 765 len = ufmt_min(len, info->fWidth); 766 767 /* get the formatter */ 768 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT); 769 770 /* handle error */ 771 if(format == 0) 772 return 0; 773 774 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 775 skipped += u_scanf_skip_leading_positive_sign(input, format, &status); 776 777 /* parse the number */ 778 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 779 780 if (!info->fSkipArg) { 781 *(double*)(args[0].ptrValue) = num; 782 } 783 784 /* mask off any necessary bits */ 785 /* if(! info->fIsLong_double) 786 num &= DBL_MAX;*/ 787 788 /* update the input's position to reflect consumed data */ 789 input->str.fPos += parsePos; 790 791 /* we converted 1 arg */ 792 *argConverted = !info->fSkipArg; 793 return parsePos; 794 } 795 796 static int32_t 797 u_scanf_string_handler(UFILE *input, 798 u_scanf_spec_info *info, 799 ufmt_args *args, 800 const UChar *fmt, 801 int32_t *fmtConsumed, 802 int32_t *argConverted) 803 { 804 const UChar *source; 805 UConverter *conv; 806 char *arg = (char*)(args[0].ptrValue); 807 char *alias = arg; 808 char *limit; 809 UErrorCode status = U_ZERO_ERROR; 810 int32_t count; 811 int32_t skipped = 0; 812 UChar c; 813 UBool isNotEOF = FALSE; 814 815 /* skip all ws in the input */ 816 if (info->fIsString) { 817 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 818 } 819 820 /* get the string one character at a time, truncating to the width */ 821 count = 0; 822 823 /* open the default converter */ 824 conv = u_getDefaultConverter(&status); 825 826 if(U_FAILURE(status)) 827 return -1; 828 829 while( (info->fWidth == -1 || count < info->fWidth) 830 && (isNotEOF = ufile_getch(input, &c)) 831 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) 832 { 833 834 if (!info->fSkipArg) { 835 /* put the character from the input onto the target */ 836 source = &c; 837 /* Since we do this one character at a time, do it this way. */ 838 if (info->fWidth > 0) { 839 limit = alias + info->fWidth - count; 840 } 841 else { 842 limit = alias + ucnv_getMaxCharSize(conv); 843 } 844 845 /* convert the character to the default codepage */ 846 ucnv_fromUnicode(conv, &alias, limit, &source, source + 1, 847 NULL, TRUE, &status); 848 849 if(U_FAILURE(status)) { 850 /* clean up */ 851 u_releaseDefaultConverter(conv); 852 return -1; 853 } 854 } 855 856 /* increment the count */ 857 ++count; 858 } 859 860 /* put the final character we read back on the input */ 861 if (!info->fSkipArg) { 862 if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF) 863 u_fungetc(c, input); 864 865 /* add the terminator */ 866 if (info->fIsString) { 867 *alias = 0x00; 868 } 869 } 870 871 /* clean up */ 872 u_releaseDefaultConverter(conv); 873 874 /* we converted 1 arg */ 875 *argConverted = !info->fSkipArg; 876 return count + skipped; 877 } 878 879 static int32_t 880 u_scanf_char_handler(UFILE *input, 881 u_scanf_spec_info *info, 882 ufmt_args *args, 883 const UChar *fmt, 884 int32_t *fmtConsumed, 885 int32_t *argConverted) 886 { 887 if (info->fWidth < 0) { 888 info->fWidth = 1; 889 } 890 info->fIsString = FALSE; 891 return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted); 892 } 893 894 static int32_t 895 u_scanf_ustring_handler(UFILE *input, 896 u_scanf_spec_info *info, 897 ufmt_args *args, 898 const UChar *fmt, 899 int32_t *fmtConsumed, 900 int32_t *argConverted) 901 { 902 UChar *arg = (UChar*)(args[0].ptrValue); 903 UChar *alias = arg; 904 int32_t count; 905 int32_t skipped = 0; 906 UChar c; 907 UBool isNotEOF = FALSE; 908 909 /* skip all ws in the input */ 910 if (info->fIsString) { 911 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 912 } 913 914 /* get the string one character at a time, truncating to the width */ 915 count = 0; 916 917 while( (info->fWidth == -1 || count < info->fWidth) 918 && (isNotEOF = ufile_getch(input, &c)) 919 && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c)))) 920 { 921 922 /* put the character from the input onto the target */ 923 if (!info->fSkipArg) { 924 *alias++ = c; 925 } 926 927 /* increment the count */ 928 ++count; 929 } 930 931 /* put the final character we read back on the input */ 932 if (!info->fSkipArg) { 933 if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) { 934 u_fungetc(c, input); 935 } 936 937 /* add the terminator */ 938 if (info->fIsString) { 939 *alias = 0x0000; 940 } 941 } 942 943 /* we converted 1 arg */ 944 *argConverted = !info->fSkipArg; 945 return count + skipped; 946 } 947 948 static int32_t 949 u_scanf_uchar_handler(UFILE *input, 950 u_scanf_spec_info *info, 951 ufmt_args *args, 952 const UChar *fmt, 953 int32_t *fmtConsumed, 954 int32_t *argConverted) 955 { 956 if (info->fWidth < 0) { 957 info->fWidth = 1; 958 } 959 info->fIsString = FALSE; 960 return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted); 961 } 962 963 static int32_t 964 u_scanf_spellout_handler(UFILE *input, 965 u_scanf_spec_info *info, 966 ufmt_args *args, 967 const UChar *fmt, 968 int32_t *fmtConsumed, 969 int32_t *argConverted) 970 { 971 int32_t len; 972 double num; 973 UNumberFormat *format; 974 int32_t parsePos = 0; 975 int32_t skipped; 976 UErrorCode status = U_ZERO_ERROR; 977 978 979 /* skip all ws in the input */ 980 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 981 982 /* fill the input's internal buffer */ 983 ufile_fill_uchar_buffer(input); 984 985 /* determine the size of the input's buffer */ 986 len = (int32_t)(input->str.fLimit - input->str.fPos); 987 988 /* truncate to the width, if specified */ 989 if(info->fWidth != -1) 990 len = ufmt_min(len, info->fWidth); 991 992 /* get the formatter */ 993 format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT); 994 995 /* handle error */ 996 if(format == 0) 997 return 0; 998 999 /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */ 1000 /* This is not applicable to RBNF. */ 1001 /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/ 1002 1003 /* parse the number */ 1004 num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status); 1005 1006 if (!info->fSkipArg) { 1007 *(double*)(args[0].ptrValue) = num; 1008 } 1009 1010 /* mask off any necessary bits */ 1011 /* if(! info->fIsLong_double) 1012 num &= DBL_MAX;*/ 1013 1014 /* update the input's position to reflect consumed data */ 1015 input->str.fPos += parsePos; 1016 1017 /* we converted 1 arg */ 1018 *argConverted = !info->fSkipArg; 1019 return parsePos + skipped; 1020 } 1021 1022 static int32_t 1023 u_scanf_hex_handler(UFILE *input, 1024 u_scanf_spec_info *info, 1025 ufmt_args *args, 1026 const UChar *fmt, 1027 int32_t *fmtConsumed, 1028 int32_t *argConverted) 1029 { 1030 int32_t len; 1031 int32_t skipped; 1032 void *num = (void*) (args[0].ptrValue); 1033 int64_t result; 1034 1035 /* skip all ws in the input */ 1036 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1037 1038 /* fill the input's internal buffer */ 1039 ufile_fill_uchar_buffer(input); 1040 1041 /* determine the size of the input's buffer */ 1042 len = (int32_t)(input->str.fLimit - input->str.fPos); 1043 1044 /* truncate to the width, if specified */ 1045 if(info->fWidth != -1) 1046 len = ufmt_min(len, info->fWidth); 1047 1048 /* check for alternate form */ 1049 if( *(input->str.fPos) == 0x0030 && 1050 (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) { 1051 1052 /* skip the '0' and 'x' or 'X' if present */ 1053 input->str.fPos += 2; 1054 len -= 2; 1055 } 1056 1057 /* parse the number */ 1058 result = ufmt_uto64(input->str.fPos, &len, 16); 1059 1060 /* update the input's position to reflect consumed data */ 1061 input->str.fPos += len; 1062 1063 /* mask off any necessary bits */ 1064 if (!info->fSkipArg) { 1065 if (info->fIsShort) 1066 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 1067 else if (info->fIsLongLong) 1068 *(int64_t*)num = result; 1069 else 1070 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 1071 } 1072 1073 /* we converted 1 arg */ 1074 *argConverted = !info->fSkipArg; 1075 return len + skipped; 1076 } 1077 1078 static int32_t 1079 u_scanf_octal_handler(UFILE *input, 1080 u_scanf_spec_info *info, 1081 ufmt_args *args, 1082 const UChar *fmt, 1083 int32_t *fmtConsumed, 1084 int32_t *argConverted) 1085 { 1086 int32_t len; 1087 int32_t skipped; 1088 void *num = (void*) (args[0].ptrValue); 1089 int64_t result; 1090 1091 /* skip all ws in the input */ 1092 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1093 1094 /* fill the input's internal buffer */ 1095 ufile_fill_uchar_buffer(input); 1096 1097 /* determine the size of the input's buffer */ 1098 len = (int32_t)(input->str.fLimit - input->str.fPos); 1099 1100 /* truncate to the width, if specified */ 1101 if(info->fWidth != -1) 1102 len = ufmt_min(len, info->fWidth); 1103 1104 /* parse the number */ 1105 result = ufmt_uto64(input->str.fPos, &len, 8); 1106 1107 /* update the input's position to reflect consumed data */ 1108 input->str.fPos += len; 1109 1110 /* mask off any necessary bits */ 1111 if (!info->fSkipArg) { 1112 if (info->fIsShort) 1113 *(int16_t*)num = (int16_t)(UINT16_MAX & result); 1114 else if (info->fIsLongLong) 1115 *(int64_t*)num = result; 1116 else 1117 *(int32_t*)num = (int32_t)(UINT32_MAX & result); 1118 } 1119 1120 /* we converted 1 arg */ 1121 *argConverted = !info->fSkipArg; 1122 return len + skipped; 1123 } 1124 1125 static int32_t 1126 u_scanf_pointer_handler(UFILE *input, 1127 u_scanf_spec_info *info, 1128 ufmt_args *args, 1129 const UChar *fmt, 1130 int32_t *fmtConsumed, 1131 int32_t *argConverted) 1132 { 1133 int32_t len; 1134 int32_t skipped; 1135 void *result; 1136 void **p = (void**)(args[0].ptrValue); 1137 1138 1139 /* skip all ws in the input */ 1140 skipped = u_scanf_skip_leading_ws(input, info->fPadChar); 1141 1142 /* fill the input's internal buffer */ 1143 ufile_fill_uchar_buffer(input); 1144 1145 /* determine the size of the input's buffer */ 1146 len = (int32_t)(input->str.fLimit - input->str.fPos); 1147 1148 /* truncate to the width, if specified */ 1149 if(info->fWidth != -1) { 1150 len = ufmt_min(len, info->fWidth); 1151 } 1152 1153 /* Make sure that we don't consume too much */ 1154 if (len > (int32_t)(sizeof(void*)*2)) { 1155 len = (int32_t)(sizeof(void*)*2); 1156 } 1157 1158 /* parse the pointer - assign to temporary value */ 1159 result = ufmt_utop(input->str.fPos, &len); 1160 1161 if (!info->fSkipArg) { 1162 *p = result; 1163 } 1164 1165 /* update the input's position to reflect consumed data */ 1166 input->str.fPos += len; 1167 1168 /* we converted 1 arg */ 1169 *argConverted = !info->fSkipArg; 1170 return len + skipped; 1171 } 1172 1173 static int32_t 1174 u_scanf_scanset_handler(UFILE *input, 1175 u_scanf_spec_info *info, 1176 ufmt_args *args, 1177 const UChar *fmt, 1178 int32_t *fmtConsumed, 1179 int32_t *argConverted) 1180 { 1181 USet *scanset; 1182 UErrorCode status = U_ZERO_ERROR; 1183 int32_t chLeft = INT32_MAX; 1184 UChar32 c; 1185 UChar *alias = (UChar*) (args[0].ptrValue); 1186 UBool isNotEOF = FALSE; 1187 UBool readCharacter = FALSE; 1188 1189 /* Create an empty set */ 1190 scanset = uset_open(0, -1); 1191 1192 /* Back up one to get the [ */ 1193 fmt--; 1194 1195 /* truncate to the width, if specified and alias the target */ 1196 if(info->fWidth >= 0) { 1197 chLeft = info->fWidth; 1198 } 1199 1200 /* parse the scanset from the fmt string */ 1201 *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status); 1202 1203 /* verify that the parse was successful */ 1204 if (U_SUCCESS(status)) { 1205 c=0; 1206 1207 /* grab characters one at a time and make sure they are in the scanset */ 1208 while(chLeft > 0) { 1209 if ((isNotEOF = ufile_getch32(input, &c)) && uset_contains(scanset, c)) { 1210 readCharacter = TRUE; 1211 if (!info->fSkipArg) { 1212 int32_t idx = 0; 1213 UBool isError = FALSE; 1214 1215 U16_APPEND(alias, idx, chLeft, c, isError); 1216 if (isError) { 1217 break; 1218 } 1219 alias += idx; 1220 } 1221 chLeft -= (1 + U_IS_SUPPLEMENTARY(c)); 1222 } 1223 else { 1224 /* if the character's not in the scanset, break out */ 1225 break; 1226 } 1227 } 1228 1229 /* put the final character we read back on the input */ 1230 if(isNotEOF && chLeft > 0) { 1231 u_fungetc(c, input); 1232 } 1233 } 1234 1235 uset_close(scanset); 1236 1237 /* if we didn't match at least 1 character, fail */ 1238 if(!readCharacter) 1239 return -1; 1240 /* otherwise, add the terminator */ 1241 else if (!info->fSkipArg) { 1242 *alias = 0x00; 1243 } 1244 1245 /* we converted 1 arg */ 1246 *argConverted = !info->fSkipArg; 1247 return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft; 1248 } 1249 1250 /* Use US-ASCII characters only for formatting. Most codepages have 1251 characters 20-7F from Unicode. Using any other codepage specific 1252 characters will make it very difficult to format the string on 1253 non-Unicode machines */ 1254 static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = { 1255 /* 0x20 */ 1256 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1257 UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY, 1258 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1259 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1260 1261 /* 0x30 */ 1262 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1263 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1264 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1265 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1266 1267 /* 0x40 */ 1268 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR, 1269 UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL, 1270 #ifdef U_USE_OBSOLETE_IO_FORMATTING 1271 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/, 1272 #else 1273 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1274 #endif 1275 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1276 1277 /* 0x50 */ 1278 UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING, 1279 #ifdef U_USE_OBSOLETE_IO_FORMATTING 1280 UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY, 1281 #else 1282 UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY, 1283 #endif 1284 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET, 1285 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1286 1287 /* 0x60 */ 1288 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR, 1289 UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL, 1290 UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY, 1291 UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL, 1292 1293 /* 0x70 */ 1294 UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING, 1295 UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY, 1296 UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1297 UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, 1298 }; 1299 1300 U_CFUNC int32_t 1301 u_scanf_parse(UFILE *f, 1302 const UChar *patternSpecification, 1303 va_list ap) 1304 { 1305 const UChar *alias; 1306 int32_t count, converted, argConsumed, cpConsumed; 1307 uint16_t handlerNum; 1308 1309 ufmt_args args; 1310 u_scanf_spec spec; 1311 ufmt_type_info info; 1312 u_scanf_handler handler; 1313 1314 /* alias the pattern */ 1315 alias = patternSpecification; 1316 1317 /* haven't converted anything yet */ 1318 argConsumed = 0; 1319 converted = 0; 1320 cpConsumed = 0; 1321 1322 /* iterate through the pattern */ 1323 for(;;) { 1324 1325 /* match any characters up to the next '%' */ 1326 while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) { 1327 alias++; 1328 } 1329 1330 /* if we aren't at a '%', or if we're at end of string, break*/ 1331 if(*alias != UP_PERCENT || *alias == 0x0000) 1332 break; 1333 1334 /* parse the specifier */ 1335 count = u_scanf_parse_spec(alias, &spec); 1336 1337 /* update the pointer in pattern */ 1338 alias += count; 1339 1340 handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS); 1341 if (handlerNum < USCANF_NUM_FMT_HANDLERS) { 1342 /* skip the argument, if necessary */ 1343 /* query the info function for argument information */ 1344 info = g_u_scanf_infos[ handlerNum ].info; 1345 if (info != ufmt_count && u_feof(f)) { 1346 break; 1347 } 1348 else if(spec.fInfo.fSkipArg) { 1349 args.ptrValue = NULL; 1350 } 1351 else { 1352 switch(info) { 1353 case ufmt_count: 1354 /* set the spec's width to the # of items converted */ 1355 spec.fInfo.fWidth = cpConsumed; 1356 /* fall through to next case */ 1357 case ufmt_char: 1358 case ufmt_uchar: 1359 case ufmt_int: 1360 case ufmt_string: 1361 case ufmt_ustring: 1362 case ufmt_pointer: 1363 case ufmt_float: 1364 case ufmt_double: 1365 args.ptrValue = va_arg(ap, void*); 1366 break; 1367 1368 default: 1369 /* else args is ignored */ 1370 args.ptrValue = NULL; 1371 break; 1372 } 1373 } 1374 1375 /* call the handler function */ 1376 handler = g_u_scanf_infos[ handlerNum ].handler; 1377 if(handler != 0) { 1378 1379 /* reset count to 1 so that += for alias works. */ 1380 count = 1; 1381 1382 cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed); 1383 1384 /* if the handler encountered an error condition, break */ 1385 if(argConsumed < 0) { 1386 converted = -1; 1387 break; 1388 } 1389 1390 /* add to the # of items converted */ 1391 converted += argConsumed; 1392 1393 /* update the pointer in pattern */ 1394 alias += count-1; 1395 } 1396 /* else do nothing */ 1397 } 1398 /* else do nothing */ 1399 1400 /* just ignore unknown tags */ 1401 } 1402 1403 /* return # of items converted */ 1404 return converted; 1405 } 1406 1407 #endif /* #if !UCONFIG_NO_FORMATTING */ 1408