1 /***************************************************************************** 2 * 3 * Copyright (C) 1999-2009, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 * 6 ******************************************************************************/ 7 8 /* 9 * uconv(1): an iconv(1)-like converter using ICU. 10 * 11 * Original code by Jonas Utterström <jonas.utterstrom (at) vittran.norrnod.se> 12 * contributed in 1999. 13 * 14 * Conversion to the C conversion API and many improvements by 15 * Yves Arrouye <yves (at) realnames.com>, current maintainer. 16 * 17 * Markus Scherer maintainer from 2003. 18 * See source code repository history for changes. 19 */ 20 21 #include <unicode/utypes.h> 22 #include <unicode/putil.h> 23 #include <unicode/ucnv.h> 24 #include <unicode/uenum.h> 25 #include <unicode/unistr.h> 26 #include <unicode/translit.h> 27 #include <unicode/uset.h> 28 #include <unicode/uclean.h> 29 30 #include <stdio.h> 31 #include <errno.h> 32 #include <string.h> 33 #include <stdlib.h> 34 35 #include "cmemory.h" 36 #include "cstring.h" 37 #include "ustrfmt.h" 38 39 #include "unicode/uwmsg.h" 40 41 U_NAMESPACE_USE 42 43 #if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__) 44 #include <io.h> 45 #include <fcntl.h> 46 #if defined(U_WINDOWS) 47 #define USE_FILENO_BINARY_MODE 1 48 /* Windows likes to rename Unix-like functions */ 49 #ifndef fileno 50 #define fileno _fileno 51 #endif 52 #ifndef setmode 53 #define setmode _setmode 54 #endif 55 #ifndef O_BINARY 56 #define O_BINARY _O_BINARY 57 #endif 58 #endif 59 #endif 60 61 #ifdef UCONVMSG_LINK 62 /* below from the README */ 63 #include "unicode/utypes.h" 64 #include "unicode/udata.h" 65 U_CFUNC char uconvmsg_dat[]; 66 #endif 67 68 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 69 70 #define DEFAULT_BUFSZ 4096 71 #define UCONVMSG "uconvmsg" 72 73 static UResourceBundle *gBundle = 0; /* Bundle containing messages. */ 74 75 /* 76 * Initialize the message bundle so that message strings can be fetched 77 * by u_wmsg(). 78 * 79 */ 80 81 static void initMsg(const char *pname) { 82 static int ps = 0; 83 84 if (!ps) { 85 char dataPath[2048]; /* XXX Sloppy: should be PATH_MAX. */ 86 UErrorCode err = U_ZERO_ERROR; 87 88 ps = 1; 89 90 /* Set up our static data - if any */ 91 #ifdef UCONVMSG_LINK 92 udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err); 93 if (U_FAILURE(err)) { 94 fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n", 95 pname, u_errorName(err)); 96 err = U_ZERO_ERROR; /* It may still fail */ 97 } 98 #endif 99 100 /* Get messages. */ 101 gBundle = u_wmsg_setPath(UCONVMSG, &err); 102 if (U_FAILURE(err)) { 103 fprintf(stderr, 104 "%s: warning: couldn't open bundle %s: %s\n", 105 pname, UCONVMSG, u_errorName(err)); 106 #ifdef UCONVMSG_LINK 107 fprintf(stderr, 108 "%s: setAppData was called, internal data %s failed to load\n", 109 pname, UCONVMSG); 110 #endif 111 112 err = U_ZERO_ERROR; 113 /* that was try #1, try again with a path */ 114 uprv_strcpy(dataPath, u_getDataDirectory()); 115 uprv_strcat(dataPath, U_FILE_SEP_STRING); 116 uprv_strcat(dataPath, UCONVMSG); 117 118 gBundle = u_wmsg_setPath(dataPath, &err); 119 if (U_FAILURE(err)) { 120 fprintf(stderr, 121 "%s: warning: still couldn't open bundle %s: %s\n", 122 pname, dataPath, u_errorName(err)); 123 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname); 124 } 125 } 126 } 127 } 128 129 /* Mapping of callback names to the callbacks passed to the converter 130 API. */ 131 132 static struct callback_ent { 133 const char *name; 134 UConverterFromUCallback fromu; 135 const void *fromuctxt; 136 UConverterToUCallback tou; 137 const void *touctxt; 138 } transcode_callbacks[] = { 139 { "substitute", 140 UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 141 UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 }, 142 { "skip", 143 UCNV_FROM_U_CALLBACK_SKIP, 0, 144 UCNV_TO_U_CALLBACK_SKIP, 0 }, 145 { "stop", 146 UCNV_FROM_U_CALLBACK_STOP, 0, 147 UCNV_TO_U_CALLBACK_STOP, 0 }, 148 { "escape", 149 UCNV_FROM_U_CALLBACK_ESCAPE, 0, 150 UCNV_TO_U_CALLBACK_ESCAPE, 0}, 151 { "escape-icu", 152 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU, 153 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU }, 154 { "escape-java", 155 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, 156 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA }, 157 { "escape-c", 158 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 159 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C }, 160 { "escape-xml", 161 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, 162 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX }, 163 { "escape-xml-hex", 164 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, 165 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX }, 166 { "escape-xml-dec", 167 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 168 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC }, 169 { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, 170 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE } 171 }; 172 173 /* Return a pointer to a callback record given its name. */ 174 175 static const struct callback_ent *findCallback(const char *name) { 176 int i, count = 177 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks); 178 179 /* We'll do a linear search, there aren't many of them and bsearch() 180 may not be that portable. */ 181 182 for (i = 0; i < count; ++i) { 183 if (!uprv_stricmp(name, transcode_callbacks[i].name)) { 184 return &transcode_callbacks[i]; 185 } 186 } 187 188 return 0; 189 } 190 191 /* Print converter information. If lookfor is set, only that converter will 192 be printed, otherwise all converters will be printed. If canon is non 193 zero, tags and aliases for each converter are printed too, in the format 194 expected for convrters.txt(5). */ 195 196 static int printConverters(const char *pname, const char *lookfor, 197 UBool canon) 198 { 199 UErrorCode err = U_ZERO_ERROR; 200 int32_t num; 201 uint16_t num_stds; 202 const char **stds; 203 204 /* If there is a specified name, just handle that now. */ 205 206 if (lookfor) { 207 if (!canon) { 208 printf("%s\n", lookfor); 209 return 0; 210 } else { 211 /* Because we are printing a canonical name, we need the 212 true converter name. We've done that already except for 213 the default name (because we want to print the exact 214 name one would get when calling ucnv_getDefaultName() 215 in non-canon mode). But since we do not know at this 216 point if we have the default name or something else, we 217 need to normalize again to the canonical converter 218 name. */ 219 220 const char *truename = ucnv_getAlias(lookfor, 0, &err); 221 if (U_SUCCESS(err)) { 222 lookfor = truename; 223 } else { 224 err = U_ZERO_ERROR; 225 } 226 } 227 } 228 229 /* Print converter names. We come here for one of two reasons: we 230 are printing all the names (lookfor was null), or we have a 231 single converter to print but in canon mode, hence we need to 232 get to it in order to print everything. */ 233 234 num = ucnv_countAvailable(); 235 if (num <= 0) { 236 initMsg(pname); 237 u_wmsg(stderr, "cantGetNames"); 238 return -1; 239 } 240 if (lookfor) { 241 num = 1; /* We know where we want to be. */ 242 } 243 244 num_stds = ucnv_countStandards(); 245 stds = (const char **) uprv_malloc(num_stds * sizeof(*stds)); 246 if (!stds) { 247 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR)); 248 return -1; 249 } else { 250 uint16_t s; 251 252 if (canon) { 253 printf("{ "); 254 } 255 for (s = 0; s < num_stds; ++s) { 256 stds[s] = ucnv_getStandard(s, &err); 257 if (canon) { 258 printf("%s ", stds[s]); 259 } 260 if (U_FAILURE(err)) { 261 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err)); 262 goto error_cleanup; 263 } 264 } 265 if (canon) { 266 puts("}"); 267 } 268 } 269 270 for (int32_t i = 0; i < num; i++) { 271 const char *name; 272 uint16_t num_aliases; 273 274 /* Set the name either to what we are looking for, or 275 to the current converter name. */ 276 277 if (lookfor) { 278 name = lookfor; 279 } else { 280 name = ucnv_getAvailableName(i); 281 } 282 283 /* Get all the aliases associated to the name. */ 284 285 err = U_ZERO_ERROR; 286 num_aliases = ucnv_countAliases(name, &err); 287 if (U_FAILURE(err)) { 288 printf("%s", name); 289 290 UnicodeString str(name, ""); 291 putchar('\t'); 292 u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), 293 u_wmsg_errorName(err)); 294 goto error_cleanup; 295 } else { 296 uint16_t a, s, t; 297 298 /* Write all the aliases and their tags. */ 299 300 for (a = 0; a < num_aliases; ++a) { 301 const char *alias = ucnv_getAlias(name, a, &err); 302 303 if (U_FAILURE(err)) { 304 UnicodeString str(name, ""); 305 putchar('\t'); 306 u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), 307 u_wmsg_errorName(err)); 308 goto error_cleanup; 309 } 310 311 /* Print the current alias so that it looks right. */ 312 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") , 313 alias, 314 (canon ? "" : " ")); 315 316 /* Look (slowly, linear searching) for a tag. */ 317 318 if (canon) { 319 /* -1 to skip the last standard */ 320 for (s = t = 0; s < num_stds-1; ++s) { 321 UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err); 322 if (U_SUCCESS(err)) { 323 /* List the standard tags */ 324 const char *standardName; 325 UBool isFirst = TRUE; 326 UErrorCode enumError = U_ZERO_ERROR; 327 while ((standardName = uenum_next(nameEnum, NULL, &enumError))) { 328 /* See if this alias is supported by this standard. */ 329 if (!strcmp(standardName, alias)) { 330 if (!t) { 331 printf(" {"); 332 t = 1; 333 } 334 /* Print a * after the default standard name */ 335 printf(" %s%s", stds[s], (isFirst ? "*" : "")); 336 } 337 isFirst = FALSE; 338 } 339 } 340 } 341 if (t) { 342 printf(" }"); 343 } 344 } 345 /* Terminate this entry. */ 346 if (canon) { 347 puts(""); 348 } 349 350 /* Move on. */ 351 } 352 /* Terminate this entry. */ 353 if (!canon) { 354 puts(""); 355 } 356 } 357 } 358 359 /* Free temporary data. */ 360 361 uprv_free(stds); 362 363 /* Success. */ 364 365 return 0; 366 error_cleanup: 367 uprv_free(stds); 368 return -1; 369 } 370 371 /* Print all available transliterators. If canon is non zero, print 372 one transliterator per line. */ 373 374 static int printTransliterators(UBool canon) 375 { 376 #if UCONFIG_NO_TRANSLITERATION 377 printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n"); 378 return 1; 379 #else 380 int32_t numtrans = utrans_countAvailableIDs(), i; 381 int buflen = 512; 382 char *buf = (char *) uprv_malloc(buflen); 383 char staticbuf[512]; 384 385 char sepchar = canon ? '\n' : ' '; 386 387 if (!buf) { 388 buf = staticbuf; 389 buflen = sizeof(staticbuf); 390 } 391 392 for (i = 0; i < numtrans; ++i) { 393 int32_t len = utrans_getAvailableID(i, buf, buflen); 394 if (len >= buflen - 1) { 395 if (buf != staticbuf) { 396 buflen <<= 1; 397 if (buflen < len) { 398 buflen = len + 64; 399 } 400 buf = (char *) uprv_realloc(buf, buflen); 401 if (!buf) { 402 buf = staticbuf; 403 buflen = sizeof(staticbuf); 404 } 405 } 406 utrans_getAvailableID(i, buf, buflen); 407 if (len >= buflen) { 408 uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */ 409 } 410 } 411 412 printf("%s", buf); 413 if (i < numtrans - 1) { 414 putchar(sepchar); 415 } 416 } 417 418 /* Add a terminating newline if needed. */ 419 420 if (sepchar != '\n') { 421 putchar('\n'); 422 } 423 424 /* Free temporary data. */ 425 426 if (buf != staticbuf) { 427 uprv_free(buf); 428 } 429 430 /* Success. */ 431 432 return 0; 433 #endif 434 } 435 436 enum { 437 uSP = 0x20, // space 438 uCR = 0xd, // carriage return 439 uLF = 0xa, // line feed 440 uNL = 0x85, // newline 441 uLS = 0x2028, // line separator 442 uPS = 0x2029, // paragraph separator 443 uSig = 0xfeff // signature/BOM character 444 }; 445 446 static inline int32_t 447 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) { 448 // find one of 449 // CR, LF, CRLF, NL, LS, PS 450 // for paragraph ends (see UAX #13/Unicode 4) 451 // and include it in the chunk 452 // all of these characters are on the BMP 453 // do not include FF or VT in case they are part of a paragraph 454 // (important for bidi contexts) 455 static const UChar paraEnds[] = { 456 0xd, 0xa, 0x85, 0x2028, 0x2029 457 }; 458 enum { 459 iCR, iLF, iNL, iLS, iPS, iCount 460 }; 461 462 // first, see if there is a CRLF split between prev and s 463 if (prev.endsWith(paraEnds + iCR, 1)) { 464 if (s.startsWith(paraEnds + iLF, 1)) { 465 return 1; // split CRLF, include the LF 466 } else if (!s.isEmpty()) { 467 return 0; // complete the last chunk 468 } else { 469 return -1; // wait for actual further contents to arrive 470 } 471 } 472 473 const UChar *u = s.getBuffer(), *limit = u + s.length(); 474 UChar c; 475 476 while (u < limit) { 477 c = *u++; 478 if ( 479 ((c < uSP) && (c == uCR || c == uLF)) || 480 (c == uNL) || 481 ((c & uLS) == uLS) 482 ) { 483 if (c == uCR) { 484 // check for CRLF 485 if (u == limit) { 486 return -1; // LF may be in the next chunk 487 } else if (*u == uLF) { 488 ++u; // include the LF in this chunk 489 } 490 } 491 return (int32_t)(u - s.getBuffer()); 492 } 493 } 494 495 return -1; // continue collecting the chunk 496 } 497 498 enum { 499 CNV_NO_FEFF, // cannot convert the U+FEFF Unicode signature character (BOM) 500 CNV_WITH_FEFF, // can convert the U+FEFF signature character 501 CNV_ADDS_FEFF // automatically adds/detects the U+FEFF signature character 502 }; 503 504 static inline UChar 505 nibbleToHex(uint8_t n) { 506 n &= 0xf; 507 return 508 n <= 9 ? 509 (UChar)(0x30 + n) : 510 (UChar)((0x61 - 10) + n); 511 } 512 513 // check the converter's Unicode signature properties; 514 // the fromUnicode side of the converter must be in its initial state 515 // and will be reset again if it was used 516 static int32_t 517 cnvSigType(UConverter *cnv) { 518 UErrorCode err; 519 int32_t result; 520 521 // test if the output charset can convert U+FEFF 522 USet *set = uset_open(1, 0); 523 err = U_ZERO_ERROR; 524 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err); 525 if (U_SUCCESS(err) && uset_contains(set, uSig)) { 526 result = CNV_WITH_FEFF; 527 } else { 528 result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted 529 } 530 uset_close(set); 531 532 if (result == CNV_WITH_FEFF) { 533 // test if the output charset emits a signature anyway 534 const UChar a[1] = { 0x61 }; // "a" 535 const UChar *in; 536 537 char buffer[20]; 538 char *out; 539 540 in = a; 541 out = buffer; 542 err = U_ZERO_ERROR; 543 ucnv_fromUnicode(cnv, 544 &out, buffer + sizeof(buffer), 545 &in, a + 1, 546 NULL, TRUE, &err); 547 ucnv_resetFromUnicode(cnv); 548 549 if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) && 550 U_SUCCESS(err) 551 ) { 552 result = CNV_ADDS_FEFF; 553 } 554 } 555 556 return result; 557 } 558 559 class ConvertFile { 560 public: 561 ConvertFile() : 562 buf(NULL), outbuf(NULL), fromoffsets(NULL), 563 bufsz(0), signature(0) {} 564 565 void 566 setBufferSize(size_t bufferSize) { 567 bufsz = bufferSize; 568 569 buf = new char[2 * bufsz]; 570 outbuf = buf + bufsz; 571 572 // +1 for an added U+FEFF in the intermediate Unicode buffer 573 fromoffsets = new int32_t[bufsz + 1]; 574 } 575 576 ~ConvertFile() { 577 delete [] buf; 578 delete [] fromoffsets; 579 } 580 581 UBool convertFile(const char *pname, 582 const char *fromcpage, 583 UConverterToUCallback toucallback, 584 const void *touctxt, 585 const char *tocpage, 586 UConverterFromUCallback fromucallback, 587 const void *fromuctxt, 588 UBool fallback, 589 const char *translit, 590 const char *infilestr, 591 FILE * outfile, int verbose); 592 private: 593 friend int main(int argc, char **argv); 594 595 char *buf, *outbuf; 596 int32_t *fromoffsets; 597 598 size_t bufsz; 599 int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character 600 }; 601 602 // Convert a file from one encoding to another 603 UBool 604 ConvertFile::convertFile(const char *pname, 605 const char *fromcpage, 606 UConverterToUCallback toucallback, 607 const void *touctxt, 608 const char *tocpage, 609 UConverterFromUCallback fromucallback, 610 const void *fromuctxt, 611 UBool fallback, 612 const char *translit, 613 const char *infilestr, 614 FILE * outfile, int verbose) 615 { 616 FILE *infile; 617 UBool ret = TRUE; 618 UConverter *convfrom = 0; 619 UConverter *convto = 0; 620 UErrorCode err = U_ZERO_ERROR; 621 UBool flush; 622 const char *cbufp, *prevbufp; 623 char *bufp; 624 625 uint32_t infoffset = 0, outfoffset = 0; /* Where we are in the file, for error reporting. */ 626 627 const UChar *unibuf, *unibufbp; 628 UChar *unibufp; 629 630 size_t rd, wr; 631 632 #if !UCONFIG_NO_TRANSLITERATION 633 Transliterator *t = 0; // Transliterator acting on Unicode data. 634 UnicodeString chunk; // One chunk of the text being collected for transformation. 635 #endif 636 UnicodeString u; // String to do the transliteration. 637 int32_t ulen; 638 639 // use conversion offsets for error messages 640 // unless a transliterator is used - 641 // a text transformation will reorder characters in unpredictable ways 642 UBool useOffsets = TRUE; 643 644 // Open the correct input file or connect to stdin for reading input 645 646 if (infilestr != 0 && strcmp(infilestr, "-")) { 647 infile = fopen(infilestr, "rb"); 648 if (infile == 0) { 649 UnicodeString str1(infilestr, ""); 650 str1.append((UChar32) 0); 651 UnicodeString str2(strerror(errno), ""); 652 str2.append((UChar32) 0); 653 initMsg(pname); 654 u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer()); 655 return FALSE; 656 } 657 } else { 658 infilestr = "-"; 659 infile = stdin; 660 #ifdef USE_FILENO_BINARY_MODE 661 if (setmode(fileno(stdin), O_BINARY) == -1) { 662 initMsg(pname); 663 u_wmsg(stderr, "cantSetInBinMode"); 664 return FALSE; 665 } 666 #endif 667 } 668 669 if (verbose) { 670 fprintf(stderr, "%s:\n", infilestr); 671 } 672 673 #if !UCONFIG_NO_TRANSLITERATION 674 // Create transliterator as needed. 675 676 if (translit != NULL && *translit) { 677 UParseError parse; 678 UnicodeString str(translit), pestr; 679 680 /* Create from rules or by ID as needed. */ 681 682 parse.line = -1; 683 684 if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) { 685 t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err); 686 } else { 687 t = Transliterator::createInstance(translit, UTRANS_FORWARD, err); 688 } 689 690 if (U_FAILURE(err)) { 691 str.append((UChar32) 0); 692 initMsg(pname); 693 694 if (parse.line >= 0) { 695 UChar linebuf[20], offsetbuf[20]; 696 uprv_itou(linebuf, 20, parse.line, 10, 0); 697 uprv_itou(offsetbuf, 20, parse.offset, 10, 0); 698 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(), 699 u_wmsg_errorName(err), linebuf, offsetbuf); 700 } else { 701 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(), 702 u_wmsg_errorName(err)); 703 } 704 705 if (t) { 706 delete t; 707 t = 0; 708 } 709 goto error_exit; 710 } 711 712 useOffsets = FALSE; 713 } 714 #endif 715 716 // Create codepage converter. If the codepage or its aliases weren't 717 // available, it returns NULL and a failure code. We also set the 718 // callbacks, and return errors in the same way. 719 720 convfrom = ucnv_open(fromcpage, &err); 721 if (U_FAILURE(err)) { 722 UnicodeString str(fromcpage, ""); 723 initMsg(pname); 724 u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(), 725 u_wmsg_errorName(err)); 726 goto error_exit; 727 } 728 ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err); 729 if (U_FAILURE(err)) { 730 initMsg(pname); 731 u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); 732 goto error_exit; 733 } 734 735 convto = ucnv_open(tocpage, &err); 736 if (U_FAILURE(err)) { 737 UnicodeString str(tocpage, ""); 738 initMsg(pname); 739 u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(), 740 u_wmsg_errorName(err)); 741 goto error_exit; 742 } 743 ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err); 744 if (U_FAILURE(err)) { 745 initMsg(pname); 746 u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); 747 goto error_exit; 748 } 749 ucnv_setFallback(convto, fallback); 750 751 UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode; 752 int8_t sig; 753 754 // OK, we can convert now. 755 sig = signature; 756 rd = 0; 757 758 do { 759 willexit = FALSE; 760 761 // input file offset at the beginning of the next buffer 762 infoffset += rd; 763 764 rd = fread(buf, 1, bufsz, infile); 765 if (ferror(infile) != 0) { 766 UnicodeString str(strerror(errno)); 767 initMsg(pname); 768 u_wmsg(stderr, "cantRead", str.getTerminatedBuffer()); 769 goto error_exit; 770 } 771 772 // Convert the read buffer into the new encoding via Unicode. 773 // After the call 'unibufp' will be placed behind the last 774 // character that was converted in the 'unibuf'. 775 // Also the 'cbufp' is positioned behind the last converted 776 // character. 777 // At the last conversion in the file, flush should be set to 778 // true so that we get all characters converted. 779 // 780 // The converter must be flushed at the end of conversion so 781 // that characters on hold also will be written. 782 783 cbufp = buf; 784 flush = (UBool)(rd != bufsz); 785 786 // convert until the input is consumed 787 do { 788 // remember the start of the current byte-to-Unicode conversion 789 prevbufp = cbufp; 790 791 unibuf = unibufp = u.getBuffer((int32_t)bufsz); 792 793 // Use bufsz instead of u.getCapacity() for the targetLimit 794 // so that we don't overflow fromoffsets[]. 795 ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp, 796 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err); 797 798 ulen = (int32_t)(unibufp - unibuf); 799 u.releaseBuffer(U_SUCCESS(err) ? ulen : 0); 800 801 // fromSawEndOfBytes indicates that ucnv_toUnicode() is done 802 // converting all of the input bytes. 803 // It works like this because ucnv_toUnicode() returns only under the 804 // following conditions: 805 // - an error occurred during conversion (an error code is set) 806 // - the target buffer is filled (the error code indicates an overflow) 807 // - the source is consumed 808 // That is, if the error code does not indicate a failure, 809 // not even an overflow, then the source must be consumed entirely. 810 fromSawEndOfBytes = (UBool)U_SUCCESS(err); 811 812 if (err == U_BUFFER_OVERFLOW_ERROR) { 813 err = U_ZERO_ERROR; 814 } else if (U_FAILURE(err)) { 815 char pos[32], errorBytes[32]; 816 int8_t i, length, errorLength; 817 818 UErrorCode localError = U_ZERO_ERROR; 819 errorLength = (int8_t)sizeof(errorBytes); 820 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError); 821 if (U_FAILURE(localError) || errorLength == 0) { 822 errorLength = 1; 823 } 824 825 // print the input file offset of the start of the error bytes: 826 // input file offset of the current byte buffer + 827 // length of the just consumed bytes - 828 // length of the error bytes 829 length = 830 (int8_t)sprintf(pos, "%d", 831 (int)(infoffset + (cbufp - buf) - errorLength)); 832 833 // output the bytes that caused the error 834 UnicodeString str; 835 for (i = 0; i < errorLength; ++i) { 836 if (i > 0) { 837 str.append((UChar)uSP); 838 } 839 str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4)); 840 str.append(nibbleToHex((uint8_t)errorBytes[i])); 841 } 842 843 initMsg(pname); 844 u_wmsg(stderr, "problemCvtToU", 845 UnicodeString(pos, length, "").getTerminatedBuffer(), 846 str.getTerminatedBuffer(), 847 u_wmsg_errorName(err)); 848 849 willexit = TRUE; 850 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ 851 } 852 853 // Replaced a check for whether the input was consumed by 854 // looping until it is; message key "premEndInput" now obsolete. 855 856 if (ulen == 0) { 857 continue; 858 } 859 860 // remove a U+FEFF Unicode signature character if requested 861 if (sig < 0) { 862 if (u.charAt(0) == uSig) { 863 u.remove(0, 1); 864 865 // account for the removed UChar and offset 866 --ulen; 867 868 if (useOffsets) { 869 // remove an offset from fromoffsets[] as well 870 // to keep the array parallel with the UChars 871 memmove(fromoffsets, fromoffsets + 1, ulen * 4); 872 } 873 874 } 875 sig = 0; 876 } 877 878 #if !UCONFIG_NO_TRANSLITERATION 879 // Transliterate/transform if needed. 880 881 // For transformation, we use chunking code - 882 // collect Unicode input until, for example, an end-of-line, 883 // then transform and output-convert that and continue collecting. 884 // This makes the transformation result independent of the buffer size 885 // while avoiding the slower keyboard mode. 886 // The end-of-chunk characters are completely included in the 887 // transformed string in case they are to be transformed themselves. 888 if (t != NULL) { 889 UnicodeString out; 890 int32_t chunkLimit; 891 892 do { 893 chunkLimit = getChunkLimit(chunk, u); 894 if (chunkLimit < 0 && flush && fromSawEndOfBytes) { 895 // use all of the rest at the end of the text 896 chunkLimit = u.length(); 897 } 898 if (chunkLimit >= 0) { 899 // complete the chunk and transform it 900 chunk.append(u, 0, chunkLimit); 901 u.remove(0, chunkLimit); 902 t->transliterate(chunk); 903 904 // append the transformation result to the result and empty the chunk 905 out.append(chunk); 906 chunk.remove(); 907 } else { 908 // continue collecting the chunk 909 chunk.append(u); 910 break; 911 } 912 } while (!u.isEmpty()); 913 914 u = out; 915 ulen = u.length(); 916 } 917 #endif 918 919 // add a U+FEFF Unicode signature character if requested 920 // and possible/necessary 921 if (sig > 0) { 922 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) { 923 u.insert(0, (UChar)uSig); 924 925 if (useOffsets) { 926 // insert a pseudo-offset into fromoffsets[] as well 927 // to keep the array parallel with the UChars 928 memmove(fromoffsets + 1, fromoffsets, ulen * 4); 929 fromoffsets[0] = -1; 930 } 931 932 // account for the additional UChar and offset 933 ++ulen; 934 } 935 sig = 0; 936 } 937 938 // Convert the Unicode buffer into the destination codepage 939 // Again 'bufp' will be placed behind the last converted character 940 // And 'unibufp' will be placed behind the last converted unicode character 941 // At the last conversion flush should be set to true to ensure that 942 // all characters left get converted 943 944 unibuf = unibufbp = u.getBuffer(); 945 946 do { 947 bufp = outbuf; 948 949 // Use fromSawEndOfBytes in addition to the flush flag - 950 // it indicates whether the intermediate Unicode string 951 // contains the very last UChars for the very last input bytes. 952 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz, 953 &unibufbp, 954 unibuf + ulen, 955 NULL, (UBool)(flush && fromSawEndOfBytes), &err); 956 957 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done 958 // converting all of the intermediate UChars. 959 // See comment for fromSawEndOfBytes. 960 toSawEndOfUnicode = (UBool)U_SUCCESS(err); 961 962 if (err == U_BUFFER_OVERFLOW_ERROR) { 963 err = U_ZERO_ERROR; 964 } else if (U_FAILURE(err)) { 965 UChar errorUChars[4]; 966 const char *errtag; 967 char pos[32]; 968 UChar32 c; 969 int8_t i, length, errorLength; 970 971 UErrorCode localError = U_ZERO_ERROR; 972 errorLength = (int8_t)LENGTHOF(errorUChars); 973 ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError); 974 if (U_FAILURE(localError) || errorLength == 0) { 975 // need at least 1 so that we don't access beyond the length of fromoffsets[] 976 errorLength = 1; 977 } 978 979 int32_t ferroffset; 980 981 if (useOffsets) { 982 // Unicode buffer offset of the start of the error UChars 983 ferroffset = (int32_t)((unibufbp - unibuf) - errorLength); 984 if (ferroffset < 0) { 985 // approximation - the character started in the previous Unicode buffer 986 ferroffset = 0; 987 } 988 989 // get the corresponding byte offset out of fromoffsets[] 990 // go back if the offset is not known for some of the UChars 991 int32_t fromoffset; 992 do { 993 fromoffset = fromoffsets[ferroffset]; 994 } while (fromoffset < 0 && --ferroffset >= 0); 995 996 // total input file offset = 997 // input file offset of the current byte buffer + 998 // byte buffer offset of where the current Unicode buffer is converted from + 999 // fromoffsets[Unicode offset] 1000 ferroffset = infoffset + (prevbufp - buf) + fromoffset; 1001 errtag = "problemCvtFromU"; 1002 } else { 1003 // Do not use fromoffsets if (t != NULL) because the Unicode text may 1004 // be different from what the offsets refer to. 1005 1006 // output file offset 1007 ferroffset = (int32_t)(outfoffset + (bufp - outbuf)); 1008 errtag = "problemCvtFromUOut"; 1009 } 1010 1011 length = (int8_t)sprintf(pos, "%u", (int)ferroffset); 1012 1013 // output the code points that caused the error 1014 UnicodeString str; 1015 for (i = 0; i < errorLength;) { 1016 if (i > 0) { 1017 str.append((UChar)uSP); 1018 } 1019 U16_NEXT(errorUChars, i, errorLength, c); 1020 if (c >= 0x100000) { 1021 str.append(nibbleToHex((uint8_t)(c >> 20))); 1022 } 1023 if (c >= 0x10000) { 1024 str.append(nibbleToHex((uint8_t)(c >> 16))); 1025 } 1026 str.append(nibbleToHex((uint8_t)(c >> 12))); 1027 str.append(nibbleToHex((uint8_t)(c >> 8))); 1028 str.append(nibbleToHex((uint8_t)(c >> 4))); 1029 str.append(nibbleToHex((uint8_t)c)); 1030 } 1031 1032 initMsg(pname); 1033 u_wmsg(stderr, errtag, 1034 UnicodeString(pos, length, "").getTerminatedBuffer(), 1035 str.getTerminatedBuffer(), 1036 u_wmsg_errorName(err)); 1037 u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer()); 1038 1039 willexit = TRUE; 1040 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ 1041 } 1042 1043 // Replaced a check for whether the intermediate Unicode characters were all consumed by 1044 // looping until they are; message key "premEnd" now obsolete. 1045 1046 // Finally, write the converted buffer to the output file 1047 size_t outlen = (size_t) (bufp - outbuf); 1048 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile)); 1049 if (wr != outlen) { 1050 UnicodeString str(strerror(errno)); 1051 initMsg(pname); 1052 u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer()); 1053 willexit = TRUE; 1054 } 1055 1056 if (willexit) { 1057 goto error_exit; 1058 } 1059 } while (!toSawEndOfUnicode); 1060 } while (!fromSawEndOfBytes); 1061 } while (!flush); // Stop when we have flushed the 1062 // converters (this means that it's 1063 // the end of output) 1064 1065 goto normal_exit; 1066 1067 error_exit: 1068 ret = FALSE; 1069 1070 normal_exit: 1071 // Cleanup. 1072 1073 ucnv_close(convfrom); 1074 ucnv_close(convto); 1075 1076 #if !UCONFIG_NO_TRANSLITERATION 1077 delete t; 1078 #endif 1079 1080 if (infile != stdin) { 1081 fclose(infile); 1082 } 1083 1084 return ret; 1085 } 1086 1087 static void usage(const char *pname, int ecode) { 1088 const UChar *msg; 1089 int32_t msgLen; 1090 UErrorCode err = U_ZERO_ERROR; 1091 FILE *fp = ecode ? stderr : stdout; 1092 int res; 1093 1094 initMsg(pname); 1095 msg = 1096 ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord", 1097 &msgLen, &err); 1098 UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1)); 1099 UnicodeString mname(msg, msgLen + 1); 1100 1101 res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer()); 1102 if (!ecode) { 1103 if (!res) { 1104 fputc('\n', fp); 1105 } 1106 if (!u_wmsg(fp, "help")) { 1107 /* Now dump callbacks and finish. */ 1108 1109 int i, count = 1110 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks); 1111 for (i = 0; i < count; ++i) { 1112 fprintf(fp, " %s", transcode_callbacks[i].name); 1113 } 1114 fputc('\n', fp); 1115 } 1116 } 1117 1118 exit(ecode); 1119 } 1120 1121 extern int 1122 main(int argc, char **argv) 1123 { 1124 FILE *outfile; 1125 int ret = 0; 1126 1127 size_t bufsz = DEFAULT_BUFSZ; 1128 1129 const char *fromcpage = 0; 1130 const char *tocpage = 0; 1131 const char *translit = 0; 1132 const char *outfilestr = 0; 1133 UBool fallback = FALSE; 1134 1135 UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP; 1136 const void *fromuctxt = 0; 1137 UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP; 1138 const void *touctxt = 0; 1139 1140 char **iter, **remainArgv, **remainArgvLimit; 1141 char **end = argv + argc; 1142 1143 const char *pname; 1144 1145 UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE; 1146 const char *printName = 0; 1147 1148 UBool verbose = FALSE; 1149 UErrorCode status = U_ZERO_ERROR; 1150 1151 ConvertFile cf; 1152 1153 /* Initialize ICU */ 1154 u_init(&status); 1155 if (U_FAILURE(status)) { 1156 fprintf(stderr, "%s: can not initialize ICU. status = %s\n", 1157 argv[0], u_errorName(status)); 1158 exit(1); 1159 } 1160 1161 // Get and prettify pname. 1162 pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); 1163 #ifdef U_WINDOWS 1164 if (!pname) { 1165 pname = uprv_strrchr(*argv, '/'); 1166 } 1167 #endif 1168 if (!pname) { 1169 pname = *argv; 1170 } else { 1171 ++pname; 1172 } 1173 1174 // First, get the arguments from command-line 1175 // to know the codepages to convert between 1176 1177 remainArgv = remainArgvLimit = argv + 1; 1178 for (iter = argv + 1; iter != end; iter++) { 1179 // Check for from charset 1180 if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) { 1181 iter++; 1182 if (iter != end) 1183 fromcpage = *iter; 1184 else 1185 usage(pname, 1); 1186 } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) { 1187 iter++; 1188 if (iter != end) 1189 tocpage = *iter; 1190 else 1191 usage(pname, 1); 1192 } else if (strcmp("-x", *iter) == 0) { 1193 iter++; 1194 if (iter != end) 1195 translit = *iter; 1196 else 1197 usage(pname, 1); 1198 } else if (!strcmp("--fallback", *iter)) { 1199 fallback = TRUE; 1200 } else if (!strcmp("--no-fallback", *iter)) { 1201 fallback = FALSE; 1202 } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) { 1203 iter++; 1204 if (iter != end) { 1205 bufsz = atoi(*iter); 1206 if ((int) bufsz <= 0) { 1207 initMsg(pname); 1208 UnicodeString str(*iter); 1209 initMsg(pname); 1210 u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer()); 1211 return 3; 1212 } 1213 } else { 1214 usage(pname, 1); 1215 } 1216 } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) { 1217 if (printTranslits) { 1218 usage(pname, 1); 1219 } 1220 printConvs = TRUE; 1221 } else if (strcmp("--default-code", *iter) == 0) { 1222 if (printTranslits) { 1223 usage(pname, 1); 1224 } 1225 printName = ucnv_getDefaultName(); 1226 } else if (strcmp("--list-code", *iter) == 0) { 1227 if (printTranslits) { 1228 usage(pname, 1); 1229 } 1230 1231 iter++; 1232 if (iter != end) { 1233 UErrorCode e = U_ZERO_ERROR; 1234 printName = ucnv_getAlias(*iter, 0, &e); 1235 if (U_FAILURE(e) || !printName) { 1236 UnicodeString str(*iter); 1237 initMsg(pname); 1238 u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer()); 1239 return 2; 1240 } 1241 } else 1242 usage(pname, 1); 1243 } else if (strcmp("--canon", *iter) == 0) { 1244 printCanon = TRUE; 1245 } else if (strcmp("-L", *iter) == 0 1246 || !strcmp("--list-transliterators", *iter)) { 1247 if (printConvs) { 1248 usage(pname, 1); 1249 } 1250 printTranslits = TRUE; 1251 } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter) 1252 || !strcmp("--help", *iter)) { 1253 usage(pname, 0); 1254 } else if (!strcmp("-c", *iter)) { 1255 fromucallback = UCNV_FROM_U_CALLBACK_SKIP; 1256 } else if (!strcmp("--to-callback", *iter)) { 1257 iter++; 1258 if (iter != end) { 1259 const struct callback_ent *cbe = findCallback(*iter); 1260 if (cbe) { 1261 fromucallback = cbe->fromu; 1262 fromuctxt = cbe->fromuctxt; 1263 } else { 1264 UnicodeString str(*iter); 1265 initMsg(pname); 1266 u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); 1267 return 4; 1268 } 1269 } else { 1270 usage(pname, 1); 1271 } 1272 } else if (!strcmp("--from-callback", *iter)) { 1273 iter++; 1274 if (iter != end) { 1275 const struct callback_ent *cbe = findCallback(*iter); 1276 if (cbe) { 1277 toucallback = cbe->tou; 1278 touctxt = cbe->touctxt; 1279 } else { 1280 UnicodeString str(*iter); 1281 initMsg(pname); 1282 u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); 1283 return 4; 1284 } 1285 } else { 1286 usage(pname, 1); 1287 } 1288 } else if (!strcmp("-i", *iter)) { 1289 toucallback = UCNV_TO_U_CALLBACK_SKIP; 1290 } else if (!strcmp("--callback", *iter)) { 1291 iter++; 1292 if (iter != end) { 1293 const struct callback_ent *cbe = findCallback(*iter); 1294 if (cbe) { 1295 fromucallback = cbe->fromu; 1296 fromuctxt = cbe->fromuctxt; 1297 toucallback = cbe->tou; 1298 touctxt = cbe->touctxt; 1299 } else { 1300 UnicodeString str(*iter); 1301 initMsg(pname); 1302 u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); 1303 return 4; 1304 } 1305 } else { 1306 usage(pname, 1); 1307 } 1308 } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) { 1309 verbose = FALSE; 1310 } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) { 1311 verbose = TRUE; 1312 } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) { 1313 printf("%s v2.1 ICU " U_ICU_VERSION "\n", pname); 1314 return 0; 1315 } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) { 1316 ++iter; 1317 if (iter != end && !outfilestr) { 1318 outfilestr = *iter; 1319 } else { 1320 usage(pname, 1); 1321 } 1322 } else if (0 == strcmp("--add-signature", *iter)) { 1323 cf.signature = 1; 1324 } else if (0 == strcmp("--remove-signature", *iter)) { 1325 cf.signature = -1; 1326 } else if (**iter == '-' && (*iter)[1]) { 1327 usage(pname, 1); 1328 } else { 1329 // move a non-option up in argv[] 1330 *remainArgvLimit++ = *iter; 1331 } 1332 } 1333 1334 if (printConvs || printName) { 1335 return printConverters(pname, printName, printCanon) ? 2 : 0; 1336 } else if (printTranslits) { 1337 return printTransliterators(printCanon) ? 3 : 0; 1338 } 1339 1340 if (!fromcpage || !uprv_strcmp(fromcpage, "-")) { 1341 fromcpage = ucnv_getDefaultName(); 1342 } 1343 if (!tocpage || !uprv_strcmp(tocpage, "-")) { 1344 tocpage = ucnv_getDefaultName(); 1345 } 1346 1347 // Open the correct output file or connect to stdout for reading input 1348 if (outfilestr != 0 && strcmp(outfilestr, "-")) { 1349 outfile = fopen(outfilestr, "wb"); 1350 if (outfile == 0) { 1351 UnicodeString str1(outfilestr, ""); 1352 UnicodeString str2(strerror(errno), ""); 1353 initMsg(pname); 1354 u_wmsg(stderr, "cantCreateOutputF", 1355 str1.getBuffer(), str2.getBuffer()); 1356 return 1; 1357 } 1358 } else { 1359 outfilestr = "-"; 1360 outfile = stdout; 1361 #ifdef USE_FILENO_BINARY_MODE 1362 if (setmode(fileno(outfile), O_BINARY) == -1) { 1363 u_wmsg(stderr, "cantSetOutBinMode"); 1364 exit(-1); 1365 } 1366 #endif 1367 } 1368 1369 /* Loop again on the arguments to find all the input files, and 1370 convert them. */ 1371 1372 cf.setBufferSize(bufsz); 1373 1374 if(remainArgv < remainArgvLimit) { 1375 for (iter = remainArgv; iter != remainArgvLimit; iter++) { 1376 if (!cf.convertFile( 1377 pname, fromcpage, toucallback, touctxt, tocpage, 1378 fromucallback, fromuctxt, fallback, translit, *iter, 1379 outfile, verbose) 1380 ) { 1381 goto error_exit; 1382 } 1383 } 1384 } else { 1385 if (!cf.convertFile( 1386 pname, fromcpage, toucallback, touctxt, tocpage, 1387 fromucallback, fromuctxt, fallback, translit, 0, 1388 outfile, verbose) 1389 ) { 1390 goto error_exit; 1391 } 1392 } 1393 1394 goto normal_exit; 1395 error_exit: 1396 #if !UCONFIG_NO_LEGACY_CONVERSION 1397 ret = 1; 1398 #else 1399 fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n"); 1400 #endif 1401 normal_exit: 1402 1403 if (outfile != stdout) { 1404 fclose(outfile); 1405 } 1406 1407 return ret; 1408 } 1409 1410 1411 /* 1412 * Hey, Emacs, please set the following: 1413 * 1414 * Local Variables: 1415 * indent-tabs-mode: nil 1416 * End: 1417 * 1418 */ 1419