Home | History | Annotate | Download | only in uconv
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*****************************************************************************
      4 *
      5 *   Copyright (C) 1999-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *
      8 ******************************************************************************/
      9 
     10 /*
     11  * uconv(1): an iconv(1)-like converter using ICU.
     12  *
     13  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom (at) vittran.norrnod.se>
     14  * contributed in 1999.
     15  *
     16  * Conversion to the C conversion API and many improvements by
     17  * Yves Arrouye <yves (at) realnames.com>, current maintainer.
     18  *
     19  * Markus Scherer maintainer from 2003.
     20  * See source code repository history for changes.
     21  */
     22 
     23 #include <unicode/utypes.h>
     24 #include <unicode/putil.h>
     25 #include <unicode/ucnv.h>
     26 #include <unicode/uenum.h>
     27 #include <unicode/unistr.h>
     28 #include <unicode/translit.h>
     29 #include <unicode/uset.h>
     30 #include <unicode/uclean.h>
     31 #include <unicode/utf16.h>
     32 
     33 #include <stdio.h>
     34 #include <errno.h>
     35 #include <string.h>
     36 #include <stdlib.h>
     37 
     38 #include "cmemory.h"
     39 #include "cstring.h"
     40 #include "ustrfmt.h"
     41 
     42 #include "unicode/uwmsg.h"
     43 
     44 U_NAMESPACE_USE
     45 
     46 #if U_PLATFORM_USES_ONLY_WIN32_API && !defined(__STRICT_ANSI__)
     47 #include <io.h>
     48 #include <fcntl.h>
     49 #if U_PLATFORM_USES_ONLY_WIN32_API
     50 #define USE_FILENO_BINARY_MODE 1
     51 /* Windows likes to rename Unix-like functions */
     52 #ifndef fileno
     53 #define fileno _fileno
     54 #endif
     55 #ifndef setmode
     56 #define setmode _setmode
     57 #endif
     58 #ifndef O_BINARY
     59 #define O_BINARY _O_BINARY
     60 #endif
     61 #endif
     62 #endif
     63 
     64 #ifdef UCONVMSG_LINK
     65 /* below from the README */
     66 #include "unicode/utypes.h"
     67 #include "unicode/udata.h"
     68 U_CFUNC char uconvmsg_dat[];
     69 #endif
     70 
     71 #define DEFAULT_BUFSZ   4096
     72 #define UCONVMSG "uconvmsg"
     73 
     74 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
     75 
     76 /*
     77  * Initialize the message bundle so that message strings can be fetched
     78  * by u_wmsg().
     79  *
     80  */
     81 
     82 static void initMsg(const char *pname) {
     83     static int ps = 0;
     84 
     85     if (!ps) {
     86         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
     87         UErrorCode err = U_ZERO_ERROR;
     88 
     89         ps = 1;
     90 
     91         /* Set up our static data - if any */
     92 #if defined(UCONVMSG_LINK) && U_PLATFORM != U_PF_OS390 /* On z/OS, this is failing. */
     93         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
     94         if (U_FAILURE(err)) {
     95           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
     96                   pname, u_errorName(err));
     97           err = U_ZERO_ERROR; /* It may still fail */
     98         }
     99 #endif
    100 
    101         /* Get messages. */
    102         gBundle = u_wmsg_setPath(UCONVMSG, &err);
    103         if (U_FAILURE(err)) {
    104             fprintf(stderr,
    105                     "%s: warning: couldn't open bundle %s: %s\n",
    106                     pname, UCONVMSG, u_errorName(err));
    107 #ifdef UCONVMSG_LINK
    108             fprintf(stderr,
    109                     "%s: setAppData was called, internal data %s failed to load\n",
    110                         pname, UCONVMSG);
    111 #endif
    112 
    113             err = U_ZERO_ERROR;
    114             /* that was try #1, try again with a path */
    115             uprv_strcpy(dataPath, u_getDataDirectory());
    116             uprv_strcat(dataPath, U_FILE_SEP_STRING);
    117             uprv_strcat(dataPath, UCONVMSG);
    118 
    119             gBundle = u_wmsg_setPath(dataPath, &err);
    120             if (U_FAILURE(err)) {
    121                 fprintf(stderr,
    122                     "%s: warning: still couldn't open bundle %s: %s\n",
    123                     pname, dataPath, u_errorName(err));
    124                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
    125             }
    126         }
    127     }
    128 }
    129 
    130 /* Mapping of callback names to the callbacks passed to the converter
    131    API. */
    132 
    133 static struct callback_ent {
    134     const char *name;
    135     UConverterFromUCallback fromu;
    136     const void *fromuctxt;
    137     UConverterToUCallback tou;
    138     const void *touctxt;
    139 } transcode_callbacks[] = {
    140     { "substitute",
    141       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
    142       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
    143     { "skip",
    144       UCNV_FROM_U_CALLBACK_SKIP, 0,
    145       UCNV_TO_U_CALLBACK_SKIP, 0 },
    146     { "stop",
    147       UCNV_FROM_U_CALLBACK_STOP, 0,
    148       UCNV_TO_U_CALLBACK_STOP, 0 },
    149     { "escape",
    150       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
    151       UCNV_TO_U_CALLBACK_ESCAPE, 0},
    152     { "escape-icu",
    153       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
    154       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
    155     { "escape-java",
    156       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
    157       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
    158     { "escape-c",
    159       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
    160       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
    161     { "escape-xml",
    162       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    163       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    164     { "escape-xml-hex",
    165       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    166       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    167     { "escape-xml-dec",
    168       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
    169       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
    170     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
    171       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
    172 };
    173 
    174 /* Return a pointer to a callback record given its name. */
    175 
    176 static const struct callback_ent *findCallback(const char *name) {
    177     int i, count =
    178         UPRV_LENGTHOF(transcode_callbacks);
    179 
    180     /* We'll do a linear search, there aren't many of them and bsearch()
    181        may not be that portable. */
    182 
    183     for (i = 0; i < count; ++i) {
    184         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
    185             return &transcode_callbacks[i];
    186         }
    187     }
    188 
    189     return 0;
    190 }
    191 
    192 /* Print converter information. If lookfor is set, only that converter will
    193    be printed, otherwise all converters will be printed. If canon is non
    194    zero, tags and aliases for each converter are printed too, in the format
    195    expected for convrters.txt(5). */
    196 
    197 static int printConverters(const char *pname, const char *lookfor,
    198     UBool canon)
    199 {
    200     UErrorCode err = U_ZERO_ERROR;
    201     int32_t num;
    202     uint16_t num_stds;
    203     const char **stds;
    204 
    205     /* If there is a specified name, just handle that now. */
    206 
    207     if (lookfor) {
    208         if (!canon) {
    209             printf("%s\n", lookfor);
    210             return 0;
    211         } else {
    212         /*  Because we are printing a canonical name, we need the
    213             true converter name. We've done that already except for
    214             the default name (because we want to print the exact
    215             name one would get when calling ucnv_getDefaultName()
    216             in non-canon mode). But since we do not know at this
    217             point if we have the default name or something else, we
    218             need to normalize again to the canonical converter
    219             name. */
    220 
    221             const char *truename = ucnv_getAlias(lookfor, 0, &err);
    222             if (U_SUCCESS(err)) {
    223                 lookfor = truename;
    224             } else {
    225                 err = U_ZERO_ERROR;
    226             }
    227         }
    228     }
    229 
    230     /* Print converter names. We come here for one of two reasons: we
    231        are printing all the names (lookfor was null), or we have a
    232        single converter to print but in canon mode, hence we need to
    233        get to it in order to print everything. */
    234 
    235     num = ucnv_countAvailable();
    236     if (num <= 0) {
    237         initMsg(pname);
    238         u_wmsg(stderr, "cantGetNames");
    239         return -1;
    240     }
    241     if (lookfor) {
    242         num = 1;                /* We know where we want to be. */
    243     }
    244 
    245     num_stds = ucnv_countStandards();
    246     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
    247     if (!stds) {
    248         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
    249         return -1;
    250     } else {
    251         uint16_t s;
    252 
    253         if (canon) {
    254             printf("{ ");
    255         }
    256         for (s = 0; s < num_stds; ++s) {
    257             stds[s] = ucnv_getStandard(s, &err);
    258             if (canon) {
    259                 printf("%s ", stds[s]);
    260             }
    261             if (U_FAILURE(err)) {
    262                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
    263                 goto error_cleanup;
    264             }
    265         }
    266         if (canon) {
    267             puts("}");
    268         }
    269     }
    270 
    271     for (int32_t i = 0; i < num; i++) {
    272         const char *name;
    273         uint16_t num_aliases;
    274 
    275         /* Set the name either to what we are looking for, or
    276         to the current converter name. */
    277 
    278         if (lookfor) {
    279             name = lookfor;
    280         } else {
    281             name = ucnv_getAvailableName(i);
    282         }
    283 
    284         /* Get all the aliases associated to the name. */
    285 
    286         err = U_ZERO_ERROR;
    287         num_aliases = ucnv_countAliases(name, &err);
    288         if (U_FAILURE(err)) {
    289             printf("%s", name);
    290 
    291             UnicodeString str(name, "");
    292             putchar('\t');
    293             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    294                 u_wmsg_errorName(err));
    295             goto error_cleanup;
    296         } else {
    297             uint16_t a, s, t;
    298 
    299             /* Write all the aliases and their tags. */
    300 
    301             for (a = 0; a < num_aliases; ++a) {
    302                 const char *alias = ucnv_getAlias(name, a, &err);
    303 
    304                 if (U_FAILURE(err)) {
    305                     UnicodeString str(name, "");
    306                     putchar('\t');
    307                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    308                         u_wmsg_errorName(err));
    309                     goto error_cleanup;
    310                 }
    311 
    312                 /* Print the current alias so that it looks right. */
    313                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
    314                                  alias,
    315                                  (canon ? "" : " "));
    316 
    317                 /* Look (slowly, linear searching) for a tag. */
    318 
    319                 if (canon) {
    320                     /* -1 to skip the last standard */
    321                     for (s = t = 0; s < num_stds-1; ++s) {
    322                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
    323                         if (U_SUCCESS(err)) {
    324                             /* List the standard tags */
    325                             const char *standardName;
    326                             UBool isFirst = TRUE;
    327                             UErrorCode enumError = U_ZERO_ERROR;
    328                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
    329                                 /* See if this alias is supported by this standard. */
    330                                 if (!strcmp(standardName, alias)) {
    331                                     if (!t) {
    332                                         printf(" {");
    333                                         t = 1;
    334                                     }
    335                                     /* Print a * after the default standard name */
    336                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
    337                                 }
    338                                 isFirst = FALSE;
    339                             }
    340                         }
    341                     }
    342                     if (t) {
    343                         printf(" }");
    344                     }
    345                 }
    346                 /* Terminate this entry. */
    347                 if (canon) {
    348                     puts("");
    349                 }
    350 
    351                 /* Move on. */
    352             }
    353             /* Terminate this entry. */
    354             if (!canon) {
    355                 puts("");
    356             }
    357         }
    358     }
    359 
    360     /* Free temporary data. */
    361 
    362     uprv_free(stds);
    363 
    364     /* Success. */
    365 
    366     return 0;
    367 error_cleanup:
    368     uprv_free(stds);
    369     return -1;
    370 }
    371 
    372 /* Print all available transliterators. If canon is non zero, print
    373    one transliterator per line. */
    374 
    375 static int printTransliterators(UBool canon)
    376 {
    377 #if UCONFIG_NO_TRANSLITERATION
    378     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
    379     return 1;
    380 #else
    381     UErrorCode status = U_ZERO_ERROR;
    382     UEnumeration *ids = utrans_openIDs(&status);
    383     int32_t i, numtrans = uenum_count(ids, &status);
    384 
    385     char sepchar = canon ? '\n' : ' ';
    386 
    387     for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
    388     	int32_t len;
    389     	const char *nextTrans = uenum_next(ids, &len, &status);
    390 
    391         printf("%s", nextTrans);
    392         if (i < numtrans - 1) {
    393             putchar(sepchar);
    394         }
    395     }
    396 
    397     uenum_close(ids);
    398 
    399     /* Add a terminating newline if needed. */
    400 
    401     if (sepchar != '\n') {
    402         putchar('\n');
    403     }
    404 
    405     /* Success. */
    406 
    407     return 0;
    408 #endif
    409 }
    410 
    411 enum {
    412     uSP = 0x20,         // space
    413     uCR = 0xd,          // carriage return
    414     uLF = 0xa,          // line feed
    415     uNL = 0x85,         // newline
    416     uLS = 0x2028,       // line separator
    417     uPS = 0x2029,       // paragraph separator
    418     uSig = 0xfeff       // signature/BOM character
    419 };
    420 
    421 static inline int32_t
    422 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
    423     // find one of
    424     // CR, LF, CRLF, NL, LS, PS
    425     // for paragraph ends (see UAX #13/Unicode 4)
    426     // and include it in the chunk
    427     // all of these characters are on the BMP
    428     // do not include FF or VT in case they are part of a paragraph
    429     // (important for bidi contexts)
    430     static const UChar paraEnds[] = {
    431         0xd, 0xa, 0x85, 0x2028, 0x2029
    432     };
    433     enum {
    434         iCR, iLF, iNL, iLS, iPS, iCount
    435     };
    436 
    437     // first, see if there is a CRLF split between prev and s
    438     if (prev.endsWith(paraEnds + iCR, 1)) {
    439         if (s.startsWith(paraEnds + iLF, 1)) {
    440             return 1; // split CRLF, include the LF
    441         } else if (!s.isEmpty()) {
    442             return 0; // complete the last chunk
    443         } else {
    444             return -1; // wait for actual further contents to arrive
    445         }
    446     }
    447 
    448     const UChar *u = s.getBuffer(), *limit = u + s.length();
    449     UChar c;
    450 
    451     while (u < limit) {
    452         c = *u++;
    453         if (
    454             ((c < uSP) && (c == uCR || c == uLF)) ||
    455             (c == uNL) ||
    456             ((c & uLS) == uLS)
    457         ) {
    458             if (c == uCR) {
    459                 // check for CRLF
    460                 if (u == limit) {
    461                     return -1; // LF may be in the next chunk
    462                 } else if (*u == uLF) {
    463                     ++u; // include the LF in this chunk
    464                 }
    465             }
    466             return (int32_t)(u - s.getBuffer());
    467         }
    468     }
    469 
    470     return -1; // continue collecting the chunk
    471 }
    472 
    473 enum {
    474     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
    475     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
    476     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
    477 };
    478 
    479 static inline UChar
    480 nibbleToHex(uint8_t n) {
    481     n &= 0xf;
    482     return
    483         n <= 9 ?
    484             (UChar)(0x30 + n) :
    485             (UChar)((0x61 - 10) + n);
    486 }
    487 
    488 // check the converter's Unicode signature properties;
    489 // the fromUnicode side of the converter must be in its initial state
    490 // and will be reset again if it was used
    491 static int32_t
    492 cnvSigType(UConverter *cnv) {
    493     UErrorCode err;
    494     int32_t result;
    495 
    496     // test if the output charset can convert U+FEFF
    497     USet *set = uset_open(1, 0);
    498     err = U_ZERO_ERROR;
    499     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
    500     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
    501         result = CNV_WITH_FEFF;
    502     } else {
    503         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
    504     }
    505     uset_close(set);
    506 
    507     if (result == CNV_WITH_FEFF) {
    508         // test if the output charset emits a signature anyway
    509         const UChar a[1] = { 0x61 }; // "a"
    510         const UChar *in;
    511 
    512         char buffer[20];
    513         char *out;
    514 
    515         in = a;
    516         out = buffer;
    517         err = U_ZERO_ERROR;
    518         ucnv_fromUnicode(cnv,
    519             &out, buffer + sizeof(buffer),
    520             &in, a + 1,
    521             NULL, TRUE, &err);
    522         ucnv_resetFromUnicode(cnv);
    523 
    524         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
    525             U_SUCCESS(err)
    526         ) {
    527             result = CNV_ADDS_FEFF;
    528         }
    529     }
    530 
    531     return result;
    532 }
    533 
    534 class ConvertFile {
    535 public:
    536     ConvertFile() :
    537         buf(NULL), outbuf(NULL), fromoffsets(NULL),
    538         bufsz(0), signature(0) {}
    539 
    540     void
    541     setBufferSize(size_t bufferSize) {
    542         bufsz = bufferSize;
    543 
    544         buf = new char[2 * bufsz];
    545         outbuf = buf + bufsz;
    546 
    547         // +1 for an added U+FEFF in the intermediate Unicode buffer
    548         fromoffsets = new int32_t[bufsz + 1];
    549     }
    550 
    551     ~ConvertFile() {
    552         delete [] buf;
    553         delete [] fromoffsets;
    554     }
    555 
    556     UBool convertFile(const char *pname,
    557                       const char *fromcpage,
    558                       UConverterToUCallback toucallback,
    559                       const void *touctxt,
    560                       const char *tocpage,
    561                       UConverterFromUCallback fromucallback,
    562                       const void *fromuctxt,
    563                       UBool fallback,
    564                       const char *translit,
    565                       const char *infilestr,
    566                       FILE * outfile, int verbose);
    567 private:
    568     friend int main(int argc, char **argv);
    569 
    570     char *buf, *outbuf;
    571     int32_t *fromoffsets;
    572 
    573     size_t bufsz;
    574     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
    575 };
    576 
    577 // Convert a file from one encoding to another
    578 UBool
    579 ConvertFile::convertFile(const char *pname,
    580                          const char *fromcpage,
    581                          UConverterToUCallback toucallback,
    582                          const void *touctxt,
    583                          const char *tocpage,
    584                          UConverterFromUCallback fromucallback,
    585                          const void *fromuctxt,
    586                          UBool fallback,
    587                          const char *translit,
    588                          const char *infilestr,
    589                          FILE * outfile, int verbose)
    590 {
    591     FILE *infile;
    592     UBool ret = TRUE;
    593     UConverter *convfrom = 0;
    594     UConverter *convto = 0;
    595     UErrorCode err = U_ZERO_ERROR;
    596     UBool flush;
    597     UBool closeFile = FALSE;
    598     const char *cbufp, *prevbufp;
    599     char *bufp;
    600 
    601     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
    602 
    603     const UChar *unibuf, *unibufbp;
    604     UChar *unibufp;
    605 
    606     size_t rd, wr;
    607 
    608 #if !UCONFIG_NO_TRANSLITERATION
    609     Transliterator *t = 0;      // Transliterator acting on Unicode data.
    610     UnicodeString chunk;        // One chunk of the text being collected for transformation.
    611 #endif
    612     UnicodeString u;            // String to do the transliteration.
    613     int32_t ulen;
    614 
    615     // use conversion offsets for error messages
    616     // unless a transliterator is used -
    617     // a text transformation will reorder characters in unpredictable ways
    618     UBool useOffsets = TRUE;
    619 
    620     // Open the correct input file or connect to stdin for reading input
    621 
    622     if (infilestr != 0 && strcmp(infilestr, "-")) {
    623         infile = fopen(infilestr, "rb");
    624         if (infile == 0) {
    625             UnicodeString str1(infilestr, "");
    626             str1.append((UChar32) 0);
    627             UnicodeString str2(strerror(errno), "");
    628             str2.append((UChar32) 0);
    629             initMsg(pname);
    630             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
    631             return FALSE;
    632         }
    633         closeFile = TRUE;
    634     } else {
    635         infilestr = "-";
    636         infile = stdin;
    637 #ifdef USE_FILENO_BINARY_MODE
    638         if (setmode(fileno(stdin), O_BINARY) == -1) {
    639             initMsg(pname);
    640             u_wmsg(stderr, "cantSetInBinMode");
    641             return FALSE;
    642         }
    643 #endif
    644     }
    645 
    646     if (verbose) {
    647         fprintf(stderr, "%s:\n", infilestr);
    648     }
    649 
    650 #if !UCONFIG_NO_TRANSLITERATION
    651     // Create transliterator as needed.
    652 
    653     if (translit != NULL && *translit) {
    654         UParseError parse;
    655         UnicodeString str(translit), pestr;
    656 
    657         /* Create from rules or by ID as needed. */
    658 
    659         parse.line = -1;
    660 
    661         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
    662             t = Transliterator::createFromRules(UNICODE_STRING_SIMPLE("Uconv"), str, UTRANS_FORWARD, parse, err);
    663         } else {
    664             t = Transliterator::createInstance(UnicodeString(translit, -1, US_INV), UTRANS_FORWARD, err);
    665         }
    666 
    667         if (U_FAILURE(err)) {
    668             str.append((UChar32) 0);
    669             initMsg(pname);
    670 
    671             if (parse.line >= 0) {
    672                 UChar linebuf[20], offsetbuf[20];
    673                 uprv_itou(linebuf, 20, parse.line, 10, 0);
    674                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
    675                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
    676                     u_wmsg_errorName(err), linebuf, offsetbuf);
    677             } else {
    678                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
    679                     u_wmsg_errorName(err));
    680             }
    681 
    682             if (t) {
    683                 delete t;
    684                 t = 0;
    685             }
    686             goto error_exit;
    687         }
    688 
    689         useOffsets = FALSE;
    690     }
    691 #endif
    692 
    693     // Create codepage converter. If the codepage or its aliases weren't
    694     // available, it returns NULL and a failure code. We also set the
    695     // callbacks, and return errors in the same way.
    696 
    697     convfrom = ucnv_open(fromcpage, &err);
    698     if (U_FAILURE(err)) {
    699         UnicodeString str(fromcpage, "");
    700         initMsg(pname);
    701         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
    702             u_wmsg_errorName(err));
    703         goto error_exit;
    704     }
    705     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
    706     if (U_FAILURE(err)) {
    707         initMsg(pname);
    708         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    709         goto error_exit;
    710     }
    711 
    712     convto = ucnv_open(tocpage, &err);
    713     if (U_FAILURE(err)) {
    714         UnicodeString str(tocpage, "");
    715         initMsg(pname);
    716         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
    717             u_wmsg_errorName(err));
    718         goto error_exit;
    719     }
    720     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
    721     if (U_FAILURE(err)) {
    722         initMsg(pname);
    723         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    724         goto error_exit;
    725     }
    726     ucnv_setFallback(convto, fallback);
    727 
    728     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
    729     int8_t sig;
    730 
    731     // OK, we can convert now.
    732     sig = signature;
    733     rd = 0;
    734 
    735     do {
    736         willexit = FALSE;
    737 
    738         // input file offset at the beginning of the next buffer
    739         infoffset += rd;
    740 
    741         rd = fread(buf, 1, bufsz, infile);
    742         if (ferror(infile) != 0) {
    743             UnicodeString str(strerror(errno));
    744             initMsg(pname);
    745             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
    746             goto error_exit;
    747         }
    748 
    749         // Convert the read buffer into the new encoding via Unicode.
    750         // After the call 'unibufp' will be placed behind the last
    751         // character that was converted in the 'unibuf'.
    752         // Also the 'cbufp' is positioned behind the last converted
    753         // character.
    754         // At the last conversion in the file, flush should be set to
    755         // true so that we get all characters converted.
    756         //
    757         // The converter must be flushed at the end of conversion so
    758         // that characters on hold also will be written.
    759 
    760         cbufp = buf;
    761         flush = (UBool)(rd != bufsz);
    762 
    763         // convert until the input is consumed
    764         do {
    765             // remember the start of the current byte-to-Unicode conversion
    766             prevbufp = cbufp;
    767 
    768             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
    769 
    770             // Use bufsz instead of u.getCapacity() for the targetLimit
    771             // so that we don't overflow fromoffsets[].
    772             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
    773                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
    774 
    775             ulen = (int32_t)(unibufp - unibuf);
    776             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
    777 
    778             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
    779             // converting all of the input bytes.
    780             // It works like this because ucnv_toUnicode() returns only under the
    781             // following conditions:
    782             // - an error occurred during conversion (an error code is set)
    783             // - the target buffer is filled (the error code indicates an overflow)
    784             // - the source is consumed
    785             // That is, if the error code does not indicate a failure,
    786             // not even an overflow, then the source must be consumed entirely.
    787             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
    788 
    789             if (err == U_BUFFER_OVERFLOW_ERROR) {
    790                 err = U_ZERO_ERROR;
    791             } else if (U_FAILURE(err)) {
    792                 char pos[32], errorBytes[32];
    793                 int8_t i, length, errorLength;
    794 
    795                 UErrorCode localError = U_ZERO_ERROR;
    796                 errorLength = (int8_t)sizeof(errorBytes);
    797                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
    798                 if (U_FAILURE(localError) || errorLength == 0) {
    799                     errorLength = 1;
    800                 }
    801 
    802                 // print the input file offset of the start of the error bytes:
    803                 // input file offset of the current byte buffer +
    804                 // length of the just consumed bytes -
    805                 // length of the error bytes
    806                 length =
    807                     (int8_t)sprintf(pos, "%d",
    808                         (int)(infoffset + (cbufp - buf) - errorLength));
    809 
    810                 // output the bytes that caused the error
    811                 UnicodeString str;
    812                 for (i = 0; i < errorLength; ++i) {
    813                     if (i > 0) {
    814                         str.append((UChar)uSP);
    815                     }
    816                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
    817                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
    818                 }
    819 
    820                 initMsg(pname);
    821                 u_wmsg(stderr, "problemCvtToU",
    822                         UnicodeString(pos, length, "").getTerminatedBuffer(),
    823                         str.getTerminatedBuffer(),
    824                         u_wmsg_errorName(err));
    825 
    826                 willexit = TRUE;
    827                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
    828             }
    829 
    830             // Replaced a check for whether the input was consumed by
    831             // looping until it is; message key "premEndInput" now obsolete.
    832 
    833             if (ulen == 0) {
    834                 continue;
    835             }
    836 
    837             // remove a U+FEFF Unicode signature character if requested
    838             if (sig < 0) {
    839                 if (u.charAt(0) == uSig) {
    840                     u.remove(0, 1);
    841 
    842                     // account for the removed UChar and offset
    843                     --ulen;
    844 
    845                     if (useOffsets) {
    846                         // remove an offset from fromoffsets[] as well
    847                         // to keep the array parallel with the UChars
    848                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
    849                     }
    850 
    851                 }
    852                 sig = 0;
    853             }
    854 
    855 #if !UCONFIG_NO_TRANSLITERATION
    856             // Transliterate/transform if needed.
    857 
    858             // For transformation, we use chunking code -
    859             // collect Unicode input until, for example, an end-of-line,
    860             // then transform and output-convert that and continue collecting.
    861             // This makes the transformation result independent of the buffer size
    862             // while avoiding the slower keyboard mode.
    863             // The end-of-chunk characters are completely included in the
    864             // transformed string in case they are to be transformed themselves.
    865             if (t != NULL) {
    866                 UnicodeString out;
    867                 int32_t chunkLimit;
    868 
    869                 do {
    870                     chunkLimit = getChunkLimit(chunk, u);
    871                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
    872                         // use all of the rest at the end of the text
    873                         chunkLimit = u.length();
    874                     }
    875                     if (chunkLimit >= 0) {
    876                         // complete the chunk and transform it
    877                         chunk.append(u, 0, chunkLimit);
    878                         u.remove(0, chunkLimit);
    879                         t->transliterate(chunk);
    880 
    881                         // append the transformation result to the result and empty the chunk
    882                         out.append(chunk);
    883                         chunk.remove();
    884                     } else {
    885                         // continue collecting the chunk
    886                         chunk.append(u);
    887                         break;
    888                     }
    889                 } while (!u.isEmpty());
    890 
    891                 u = out;
    892                 ulen = u.length();
    893             }
    894 #endif
    895 
    896             // add a U+FEFF Unicode signature character if requested
    897             // and possible/necessary
    898             if (sig > 0) {
    899                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
    900                     u.insert(0, (UChar)uSig);
    901 
    902                     if (useOffsets) {
    903                         // insert a pseudo-offset into fromoffsets[] as well
    904                         // to keep the array parallel with the UChars
    905                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
    906                         fromoffsets[0] = -1;
    907                     }
    908 
    909                     // account for the additional UChar and offset
    910                     ++ulen;
    911                 }
    912                 sig = 0;
    913             }
    914 
    915             // Convert the Unicode buffer into the destination codepage
    916             // Again 'bufp' will be placed behind the last converted character
    917             // And 'unibufp' will be placed behind the last converted unicode character
    918             // At the last conversion flush should be set to true to ensure that
    919             // all characters left get converted
    920 
    921             unibuf = unibufbp = u.getBuffer();
    922 
    923             do {
    924                 bufp = outbuf;
    925 
    926                 // Use fromSawEndOfBytes in addition to the flush flag -
    927                 // it indicates whether the intermediate Unicode string
    928                 // contains the very last UChars for the very last input bytes.
    929                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
    930                                  &unibufbp,
    931                                  unibuf + ulen,
    932                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
    933 
    934                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
    935                 // converting all of the intermediate UChars.
    936                 // See comment for fromSawEndOfBytes.
    937                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
    938 
    939                 if (err == U_BUFFER_OVERFLOW_ERROR) {
    940                     err = U_ZERO_ERROR;
    941                 } else if (U_FAILURE(err)) {
    942                     UChar errorUChars[4];
    943                     const char *errtag;
    944                     char pos[32];
    945                     UChar32 c;
    946                     int8_t i, length, errorLength;
    947 
    948                     UErrorCode localError = U_ZERO_ERROR;
    949                     errorLength = UPRV_LENGTHOF(errorUChars);
    950                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
    951                     if (U_FAILURE(localError) || errorLength == 0) {
    952                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
    953                         errorLength = 1;
    954                     }
    955 
    956                     int32_t ferroffset;
    957 
    958                     if (useOffsets) {
    959                         // Unicode buffer offset of the start of the error UChars
    960                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
    961                         if (ferroffset < 0) {
    962                             // approximation - the character started in the previous Unicode buffer
    963                             ferroffset = 0;
    964                         }
    965 
    966                         // get the corresponding byte offset out of fromoffsets[]
    967                         // go back if the offset is not known for some of the UChars
    968                         int32_t fromoffset;
    969                         do {
    970                             fromoffset = fromoffsets[ferroffset];
    971                         } while (fromoffset < 0 && --ferroffset >= 0);
    972 
    973                         // total input file offset =
    974                         // input file offset of the current byte buffer +
    975                         // byte buffer offset of where the current Unicode buffer is converted from +
    976                         // fromoffsets[Unicode offset]
    977                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
    978                         errtag = "problemCvtFromU";
    979                     } else {
    980                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
    981                         // be different from what the offsets refer to.
    982 
    983                         // output file offset
    984                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
    985                         errtag = "problemCvtFromUOut";
    986                     }
    987 
    988                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
    989 
    990                     // output the code points that caused the error
    991                     UnicodeString str;
    992                     for (i = 0; i < errorLength;) {
    993                         if (i > 0) {
    994                             str.append((UChar)uSP);
    995                         }
    996                         U16_NEXT(errorUChars, i, errorLength, c);
    997                         if (c >= 0x100000) {
    998                             str.append(nibbleToHex((uint8_t)(c >> 20)));
    999                         }
   1000                         if (c >= 0x10000) {
   1001                             str.append(nibbleToHex((uint8_t)(c >> 16)));
   1002                         }
   1003                         str.append(nibbleToHex((uint8_t)(c >> 12)));
   1004                         str.append(nibbleToHex((uint8_t)(c >> 8)));
   1005                         str.append(nibbleToHex((uint8_t)(c >> 4)));
   1006                         str.append(nibbleToHex((uint8_t)c));
   1007                     }
   1008 
   1009                     initMsg(pname);
   1010                     u_wmsg(stderr, errtag,
   1011                             UnicodeString(pos, length, "").getTerminatedBuffer(),
   1012                             str.getTerminatedBuffer(),
   1013                            u_wmsg_errorName(err));
   1014                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
   1015 
   1016                     willexit = TRUE;
   1017                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
   1018                 }
   1019 
   1020                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
   1021                 // looping until they are; message key "premEnd" now obsolete.
   1022 
   1023                 // Finally, write the converted buffer to the output file
   1024                 size_t outlen = (size_t) (bufp - outbuf);
   1025                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
   1026                 if (wr != outlen) {
   1027                     UnicodeString str(strerror(errno));
   1028                     initMsg(pname);
   1029                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
   1030                     willexit = TRUE;
   1031                 }
   1032 
   1033                 if (willexit) {
   1034                     goto error_exit;
   1035                 }
   1036             } while (!toSawEndOfUnicode);
   1037         } while (!fromSawEndOfBytes);
   1038     } while (!flush);           // Stop when we have flushed the
   1039                                 // converters (this means that it's
   1040                                 // the end of output)
   1041 
   1042     goto normal_exit;
   1043 
   1044 error_exit:
   1045     ret = FALSE;
   1046 
   1047 normal_exit:
   1048     // Cleanup.
   1049 
   1050     ucnv_close(convfrom);
   1051     ucnv_close(convto);
   1052 
   1053 #if !UCONFIG_NO_TRANSLITERATION
   1054     delete t;
   1055 #endif
   1056 
   1057     if (closeFile) {
   1058         fclose(infile);
   1059     }
   1060 
   1061     return ret;
   1062 }
   1063 
   1064 static void usage(const char *pname, int ecode) {
   1065     const UChar *msg;
   1066     int32_t msgLen;
   1067     UErrorCode err = U_ZERO_ERROR;
   1068     FILE *fp = ecode ? stderr : stdout;
   1069     int res;
   1070 
   1071     initMsg(pname);
   1072     msg =
   1073         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
   1074                             &msgLen, &err);
   1075     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
   1076     UnicodeString mname(msg, msgLen + 1);
   1077 
   1078     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
   1079     if (!ecode) {
   1080         if (!res) {
   1081             fputc('\n', fp);
   1082         }
   1083         if (!u_wmsg(fp, "help")) {
   1084             /* Now dump callbacks and finish. */
   1085 
   1086             int i, count =
   1087                 UPRV_LENGTHOF(transcode_callbacks);
   1088             for (i = 0; i < count; ++i) {
   1089                 fprintf(fp, " %s", transcode_callbacks[i].name);
   1090             }
   1091             fputc('\n', fp);
   1092         }
   1093     }
   1094 
   1095     exit(ecode);
   1096 }
   1097 
   1098 extern int
   1099 main(int argc, char **argv)
   1100 {
   1101     FILE *outfile;
   1102     int ret = 0;
   1103 
   1104     size_t bufsz = DEFAULT_BUFSZ;
   1105 
   1106     const char *fromcpage = 0;
   1107     const char *tocpage = 0;
   1108     const char *translit = 0;
   1109     const char *outfilestr = 0;
   1110     UBool fallback = FALSE;
   1111 
   1112     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
   1113     const void *fromuctxt = 0;
   1114     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
   1115     const void *touctxt = 0;
   1116 
   1117     char **iter, **remainArgv, **remainArgvLimit;
   1118     char **end = argv + argc;
   1119 
   1120     const char *pname;
   1121 
   1122     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
   1123     const char *printName = 0;
   1124 
   1125     UBool verbose = FALSE;
   1126     UErrorCode status = U_ZERO_ERROR;
   1127 
   1128     ConvertFile cf;
   1129 
   1130     /* Initialize ICU */
   1131     u_init(&status);
   1132     if (U_FAILURE(status)) {
   1133         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
   1134             argv[0], u_errorName(status));
   1135         exit(1);
   1136     }
   1137 
   1138     // Get and prettify pname.
   1139     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
   1140 #if U_PLATFORM_USES_ONLY_WIN32_API
   1141     if (!pname) {
   1142         pname = uprv_strrchr(*argv, '/');
   1143     }
   1144 #endif
   1145     if (!pname) {
   1146         pname = *argv;
   1147     } else {
   1148         ++pname;
   1149     }
   1150 
   1151     // First, get the arguments from command-line
   1152     // to know the codepages to convert between
   1153 
   1154     remainArgv = remainArgvLimit = argv + 1;
   1155     for (iter = argv + 1; iter != end; iter++) {
   1156         // Check for from charset
   1157         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
   1158             iter++;
   1159             if (iter != end)
   1160                 fromcpage = *iter;
   1161             else
   1162                 usage(pname, 1);
   1163         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
   1164             iter++;
   1165             if (iter != end)
   1166                 tocpage = *iter;
   1167             else
   1168                 usage(pname, 1);
   1169         } else if (strcmp("-x", *iter) == 0) {
   1170             iter++;
   1171             if (iter != end)
   1172                 translit = *iter;
   1173             else
   1174                 usage(pname, 1);
   1175         } else if (!strcmp("--fallback", *iter)) {
   1176             fallback = TRUE;
   1177         } else if (!strcmp("--no-fallback", *iter)) {
   1178             fallback = FALSE;
   1179         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
   1180             iter++;
   1181             if (iter != end) {
   1182                 bufsz = atoi(*iter);
   1183                 if ((int) bufsz <= 0) {
   1184                     initMsg(pname);
   1185                     UnicodeString str(*iter);
   1186                     initMsg(pname);
   1187                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
   1188                     return 3;
   1189                 }
   1190             } else {
   1191                 usage(pname, 1);
   1192             }
   1193         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
   1194             if (printTranslits) {
   1195                 usage(pname, 1);
   1196             }
   1197             printConvs = TRUE;
   1198         } else if (strcmp("--default-code", *iter) == 0) {
   1199             if (printTranslits) {
   1200                 usage(pname, 1);
   1201             }
   1202             printName = ucnv_getDefaultName();
   1203         } else if (strcmp("--list-code", *iter) == 0) {
   1204             if (printTranslits) {
   1205                 usage(pname, 1);
   1206             }
   1207 
   1208             iter++;
   1209             if (iter != end) {
   1210                 UErrorCode e = U_ZERO_ERROR;
   1211                 printName = ucnv_getAlias(*iter, 0, &e);
   1212                 if (U_FAILURE(e) || !printName) {
   1213                     UnicodeString str(*iter);
   1214                     initMsg(pname);
   1215                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
   1216                     return 2;
   1217                 }
   1218             } else
   1219                 usage(pname, 1);
   1220         } else if (strcmp("--canon", *iter) == 0) {
   1221             printCanon = TRUE;
   1222         } else if (strcmp("-L", *iter) == 0
   1223             || !strcmp("--list-transliterators", *iter)) {
   1224             if (printConvs) {
   1225                 usage(pname, 1);
   1226             }
   1227             printTranslits = TRUE;
   1228         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
   1229             || !strcmp("--help", *iter)) {
   1230             usage(pname, 0);
   1231         } else if (!strcmp("-c", *iter)) {
   1232             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
   1233         } else if (!strcmp("--to-callback", *iter)) {
   1234             iter++;
   1235             if (iter != end) {
   1236                 const struct callback_ent *cbe = findCallback(*iter);
   1237                 if (cbe) {
   1238                     fromucallback = cbe->fromu;
   1239                     fromuctxt = cbe->fromuctxt;
   1240                 } else {
   1241                     UnicodeString str(*iter);
   1242                     initMsg(pname);
   1243                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1244                     return 4;
   1245                 }
   1246             } else {
   1247                 usage(pname, 1);
   1248             }
   1249         } else if (!strcmp("--from-callback", *iter)) {
   1250             iter++;
   1251             if (iter != end) {
   1252                 const struct callback_ent *cbe = findCallback(*iter);
   1253                 if (cbe) {
   1254                     toucallback = cbe->tou;
   1255                     touctxt = cbe->touctxt;
   1256                 } else {
   1257                     UnicodeString str(*iter);
   1258                     initMsg(pname);
   1259                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1260                     return 4;
   1261                 }
   1262             } else {
   1263                 usage(pname, 1);
   1264             }
   1265         } else if (!strcmp("-i", *iter)) {
   1266             toucallback = UCNV_TO_U_CALLBACK_SKIP;
   1267         } else if (!strcmp("--callback", *iter)) {
   1268             iter++;
   1269             if (iter != end) {
   1270                 const struct callback_ent *cbe = findCallback(*iter);
   1271                 if (cbe) {
   1272                     fromucallback = cbe->fromu;
   1273                     fromuctxt = cbe->fromuctxt;
   1274                     toucallback = cbe->tou;
   1275                     touctxt = cbe->touctxt;
   1276                 } else {
   1277                     UnicodeString str(*iter);
   1278                     initMsg(pname);
   1279                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1280                     return 4;
   1281                 }
   1282             } else {
   1283                 usage(pname, 1);
   1284             }
   1285         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
   1286             verbose = FALSE;
   1287         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
   1288             verbose = TRUE;
   1289         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
   1290             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
   1291             return 0;
   1292         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
   1293             ++iter;
   1294             if (iter != end && !outfilestr) {
   1295                 outfilestr = *iter;
   1296             } else {
   1297                 usage(pname, 1);
   1298             }
   1299         } else if (0 == strcmp("--add-signature", *iter)) {
   1300             cf.signature = 1;
   1301         } else if (0 == strcmp("--remove-signature", *iter)) {
   1302             cf.signature = -1;
   1303         } else if (**iter == '-' && (*iter)[1]) {
   1304             usage(pname, 1);
   1305         } else {
   1306             // move a non-option up in argv[]
   1307             *remainArgvLimit++ = *iter;
   1308         }
   1309     }
   1310 
   1311     if (printConvs || printName) {
   1312         return printConverters(pname, printName, printCanon) ? 2 : 0;
   1313     } else if (printTranslits) {
   1314         return printTransliterators(printCanon) ? 3 : 0;
   1315     }
   1316 
   1317     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
   1318         fromcpage = ucnv_getDefaultName();
   1319     }
   1320     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
   1321         tocpage = ucnv_getDefaultName();
   1322     }
   1323 
   1324     // Open the correct output file or connect to stdout for reading input
   1325     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
   1326         outfile = fopen(outfilestr, "wb");
   1327         if (outfile == 0) {
   1328             UnicodeString str1(outfilestr, "");
   1329             UnicodeString str2(strerror(errno), "");
   1330             initMsg(pname);
   1331             u_wmsg(stderr, "cantCreateOutputF",
   1332                 str1.getBuffer(), str2.getBuffer());
   1333             return 1;
   1334         }
   1335     } else {
   1336         outfilestr = "-";
   1337         outfile = stdout;
   1338 #ifdef USE_FILENO_BINARY_MODE
   1339         if (setmode(fileno(outfile), O_BINARY) == -1) {
   1340             u_wmsg(stderr, "cantSetOutBinMode");
   1341             exit(-1);
   1342         }
   1343 #endif
   1344     }
   1345 
   1346     /* Loop again on the arguments to find all the input files, and
   1347     convert them. */
   1348 
   1349     cf.setBufferSize(bufsz);
   1350 
   1351     if(remainArgv < remainArgvLimit) {
   1352         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
   1353             if (!cf.convertFile(
   1354                     pname, fromcpage, toucallback, touctxt, tocpage,
   1355                     fromucallback, fromuctxt, fallback, translit, *iter,
   1356                     outfile, verbose)
   1357             ) {
   1358                 goto error_exit;
   1359             }
   1360         }
   1361     } else {
   1362         if (!cf.convertFile(
   1363                 pname, fromcpage, toucallback, touctxt, tocpage,
   1364                 fromucallback, fromuctxt, fallback, translit, 0,
   1365                 outfile, verbose)
   1366         ) {
   1367             goto error_exit;
   1368         }
   1369     }
   1370 
   1371     goto normal_exit;
   1372 error_exit:
   1373 #if !UCONFIG_NO_LEGACY_CONVERSION
   1374     ret = 1;
   1375 #else
   1376     fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
   1377 #endif
   1378 normal_exit:
   1379 
   1380     if (outfile != stdout) {
   1381         fclose(outfile);
   1382     }
   1383 
   1384     u_cleanup();
   1385 
   1386     return ret;
   1387 }
   1388 
   1389 
   1390 /*
   1391  * Hey, Emacs, please set the following:
   1392  *
   1393  * Local Variables:
   1394  * indent-tabs-mode: nil
   1395  * End:
   1396  *
   1397  */
   1398