Home | History | Annotate | Download | only in uconv
      1 /*****************************************************************************
      2 *
      3 *   Copyright (C) 1999-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *
      6 ******************************************************************************/
      7 
      8 /*
      9  * uconv(1): an iconv(1)-like converter using ICU.
     10  *
     11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom (at) vittran.norrnod.se>
     12  * contributed in 1999.
     13  *
     14  * Conversion to the C conversion API and many improvements by
     15  * Yves Arrouye <yves (at) realnames.com>, current maintainer.
     16  *
     17  * Markus Scherer maintainer from 2003.
     18  * See source code repository history for changes.
     19  */
     20 
     21 #include <unicode/utypes.h>
     22 #include <unicode/putil.h>
     23 #include <unicode/ucnv.h>
     24 #include <unicode/uenum.h>
     25 #include <unicode/unistr.h>
     26 #include <unicode/translit.h>
     27 #include <unicode/uset.h>
     28 #include <unicode/uclean.h>
     29 #include <unicode/utf16.h>
     30 
     31 #include <stdio.h>
     32 #include <errno.h>
     33 #include <string.h>
     34 #include <stdlib.h>
     35 
     36 #include "cmemory.h"
     37 #include "cstring.h"
     38 #include "ustrfmt.h"
     39 
     40 #include "unicode/uwmsg.h"
     41 
     42 U_NAMESPACE_USE
     43 
     44 #if U_PLATFORM_USES_ONLY_WIN32_API && !defined(__STRICT_ANSI__)
     45 #include <io.h>
     46 #include <fcntl.h>
     47 #if U_PLATFORM_USES_ONLY_WIN32_API
     48 #define USE_FILENO_BINARY_MODE 1
     49 /* Windows likes to rename Unix-like functions */
     50 #ifndef fileno
     51 #define fileno _fileno
     52 #endif
     53 #ifndef setmode
     54 #define setmode _setmode
     55 #endif
     56 #ifndef O_BINARY
     57 #define O_BINARY _O_BINARY
     58 #endif
     59 #endif
     60 #endif
     61 
     62 #ifdef UCONVMSG_LINK
     63 /* below from the README */
     64 #include "unicode/utypes.h"
     65 #include "unicode/udata.h"
     66 U_CFUNC char uconvmsg_dat[];
     67 #endif
     68 
     69 #define DEFAULT_BUFSZ   4096
     70 #define UCONVMSG "uconvmsg"
     71 
     72 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
     73 
     74 /*
     75  * Initialize the message bundle so that message strings can be fetched
     76  * by u_wmsg().
     77  *
     78  */
     79 
     80 static void initMsg(const char *pname) {
     81     static int ps = 0;
     82 
     83     if (!ps) {
     84         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
     85         UErrorCode err = U_ZERO_ERROR;
     86 
     87         ps = 1;
     88 
     89         /* Set up our static data - if any */
     90 #if defined(UCONVMSG_LINK) && U_PLATFORM != U_PF_OS390 /* On z/OS, this is failing. */
     91         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
     92         if (U_FAILURE(err)) {
     93           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
     94                   pname, u_errorName(err));
     95           err = U_ZERO_ERROR; /* It may still fail */
     96         }
     97 #endif
     98 
     99         /* Get messages. */
    100         gBundle = u_wmsg_setPath(UCONVMSG, &err);
    101         if (U_FAILURE(err)) {
    102             fprintf(stderr,
    103                     "%s: warning: couldn't open bundle %s: %s\n",
    104                     pname, UCONVMSG, u_errorName(err));
    105 #ifdef UCONVMSG_LINK
    106             fprintf(stderr,
    107                     "%s: setAppData was called, internal data %s failed to load\n",
    108                         pname, UCONVMSG);
    109 #endif
    110 
    111             err = U_ZERO_ERROR;
    112             /* that was try #1, try again with a path */
    113             uprv_strcpy(dataPath, u_getDataDirectory());
    114             uprv_strcat(dataPath, U_FILE_SEP_STRING);
    115             uprv_strcat(dataPath, UCONVMSG);
    116 
    117             gBundle = u_wmsg_setPath(dataPath, &err);
    118             if (U_FAILURE(err)) {
    119                 fprintf(stderr,
    120                     "%s: warning: still couldn't open bundle %s: %s\n",
    121                     pname, dataPath, u_errorName(err));
    122                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
    123             }
    124         }
    125     }
    126 }
    127 
    128 /* Mapping of callback names to the callbacks passed to the converter
    129    API. */
    130 
    131 static struct callback_ent {
    132     const char *name;
    133     UConverterFromUCallback fromu;
    134     const void *fromuctxt;
    135     UConverterToUCallback tou;
    136     const void *touctxt;
    137 } transcode_callbacks[] = {
    138     { "substitute",
    139       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
    140       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
    141     { "skip",
    142       UCNV_FROM_U_CALLBACK_SKIP, 0,
    143       UCNV_TO_U_CALLBACK_SKIP, 0 },
    144     { "stop",
    145       UCNV_FROM_U_CALLBACK_STOP, 0,
    146       UCNV_TO_U_CALLBACK_STOP, 0 },
    147     { "escape",
    148       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
    149       UCNV_TO_U_CALLBACK_ESCAPE, 0},
    150     { "escape-icu",
    151       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
    152       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
    153     { "escape-java",
    154       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
    155       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
    156     { "escape-c",
    157       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
    158       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
    159     { "escape-xml",
    160       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    161       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    162     { "escape-xml-hex",
    163       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    164       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    165     { "escape-xml-dec",
    166       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
    167       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
    168     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
    169       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
    170 };
    171 
    172 /* Return a pointer to a callback record given its name. */
    173 
    174 static const struct callback_ent *findCallback(const char *name) {
    175     int i, count =
    176         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
    177 
    178     /* We'll do a linear search, there aren't many of them and bsearch()
    179        may not be that portable. */
    180 
    181     for (i = 0; i < count; ++i) {
    182         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
    183             return &transcode_callbacks[i];
    184         }
    185     }
    186 
    187     return 0;
    188 }
    189 
    190 /* Print converter information. If lookfor is set, only that converter will
    191    be printed, otherwise all converters will be printed. If canon is non
    192    zero, tags and aliases for each converter are printed too, in the format
    193    expected for convrters.txt(5). */
    194 
    195 static int printConverters(const char *pname, const char *lookfor,
    196     UBool canon)
    197 {
    198     UErrorCode err = U_ZERO_ERROR;
    199     int32_t num;
    200     uint16_t num_stds;
    201     const char **stds;
    202 
    203     /* If there is a specified name, just handle that now. */
    204 
    205     if (lookfor) {
    206         if (!canon) {
    207             printf("%s\n", lookfor);
    208             return 0;
    209         } else {
    210         /*  Because we are printing a canonical name, we need the
    211             true converter name. We've done that already except for
    212             the default name (because we want to print the exact
    213             name one would get when calling ucnv_getDefaultName()
    214             in non-canon mode). But since we do not know at this
    215             point if we have the default name or something else, we
    216             need to normalize again to the canonical converter
    217             name. */
    218 
    219             const char *truename = ucnv_getAlias(lookfor, 0, &err);
    220             if (U_SUCCESS(err)) {
    221                 lookfor = truename;
    222             } else {
    223                 err = U_ZERO_ERROR;
    224             }
    225         }
    226     }
    227 
    228     /* Print converter names. We come here for one of two reasons: we
    229        are printing all the names (lookfor was null), or we have a
    230        single converter to print but in canon mode, hence we need to
    231        get to it in order to print everything. */
    232 
    233     num = ucnv_countAvailable();
    234     if (num <= 0) {
    235         initMsg(pname);
    236         u_wmsg(stderr, "cantGetNames");
    237         return -1;
    238     }
    239     if (lookfor) {
    240         num = 1;                /* We know where we want to be. */
    241     }
    242 
    243     num_stds = ucnv_countStandards();
    244     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
    245     if (!stds) {
    246         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
    247         return -1;
    248     } else {
    249         uint16_t s;
    250 
    251         if (canon) {
    252             printf("{ ");
    253         }
    254         for (s = 0; s < num_stds; ++s) {
    255             stds[s] = ucnv_getStandard(s, &err);
    256             if (canon) {
    257                 printf("%s ", stds[s]);
    258             }
    259             if (U_FAILURE(err)) {
    260                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
    261                 goto error_cleanup;
    262             }
    263         }
    264         if (canon) {
    265             puts("}");
    266         }
    267     }
    268 
    269     for (int32_t i = 0; i < num; i++) {
    270         const char *name;
    271         uint16_t num_aliases;
    272 
    273         /* Set the name either to what we are looking for, or
    274         to the current converter name. */
    275 
    276         if (lookfor) {
    277             name = lookfor;
    278         } else {
    279             name = ucnv_getAvailableName(i);
    280         }
    281 
    282         /* Get all the aliases associated to the name. */
    283 
    284         err = U_ZERO_ERROR;
    285         num_aliases = ucnv_countAliases(name, &err);
    286         if (U_FAILURE(err)) {
    287             printf("%s", name);
    288 
    289             UnicodeString str(name, "");
    290             putchar('\t');
    291             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    292                 u_wmsg_errorName(err));
    293             goto error_cleanup;
    294         } else {
    295             uint16_t a, s, t;
    296 
    297             /* Write all the aliases and their tags. */
    298 
    299             for (a = 0; a < num_aliases; ++a) {
    300                 const char *alias = ucnv_getAlias(name, a, &err);
    301 
    302                 if (U_FAILURE(err)) {
    303                     UnicodeString str(name, "");
    304                     putchar('\t');
    305                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    306                         u_wmsg_errorName(err));
    307                     goto error_cleanup;
    308                 }
    309 
    310                 /* Print the current alias so that it looks right. */
    311                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
    312                                  alias,
    313                                  (canon ? "" : " "));
    314 
    315                 /* Look (slowly, linear searching) for a tag. */
    316 
    317                 if (canon) {
    318                     /* -1 to skip the last standard */
    319                     for (s = t = 0; s < num_stds-1; ++s) {
    320                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
    321                         if (U_SUCCESS(err)) {
    322                             /* List the standard tags */
    323                             const char *standardName;
    324                             UBool isFirst = TRUE;
    325                             UErrorCode enumError = U_ZERO_ERROR;
    326                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
    327                                 /* See if this alias is supported by this standard. */
    328                                 if (!strcmp(standardName, alias)) {
    329                                     if (!t) {
    330                                         printf(" {");
    331                                         t = 1;
    332                                     }
    333                                     /* Print a * after the default standard name */
    334                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
    335                                 }
    336                                 isFirst = FALSE;
    337                             }
    338                         }
    339                     }
    340                     if (t) {
    341                         printf(" }");
    342                     }
    343                 }
    344                 /* Terminate this entry. */
    345                 if (canon) {
    346                     puts("");
    347                 }
    348 
    349                 /* Move on. */
    350             }
    351             /* Terminate this entry. */
    352             if (!canon) {
    353                 puts("");
    354             }
    355         }
    356     }
    357 
    358     /* Free temporary data. */
    359 
    360     uprv_free(stds);
    361 
    362     /* Success. */
    363 
    364     return 0;
    365 error_cleanup:
    366     uprv_free(stds);
    367     return -1;
    368 }
    369 
    370 /* Print all available transliterators. If canon is non zero, print
    371    one transliterator per line. */
    372 
    373 static int printTransliterators(UBool canon)
    374 {
    375 #if UCONFIG_NO_TRANSLITERATION
    376     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
    377     return 1;
    378 #else
    379     UErrorCode status = U_ZERO_ERROR;
    380     UEnumeration *ids = utrans_openIDs(&status);
    381     int32_t i, numtrans = uenum_count(ids, &status);
    382 
    383     char sepchar = canon ? '\n' : ' ';
    384 
    385     for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
    386     	int32_t len;
    387     	const char *nextTrans = uenum_next(ids, &len, &status);
    388 
    389         printf("%s", nextTrans);
    390         if (i < numtrans - 1) {
    391             putchar(sepchar);
    392         }
    393     }
    394 
    395     uenum_close(ids);
    396 
    397     /* Add a terminating newline if needed. */
    398 
    399     if (sepchar != '\n') {
    400         putchar('\n');
    401     }
    402 
    403     /* Success. */
    404 
    405     return 0;
    406 #endif
    407 }
    408 
    409 enum {
    410     uSP = 0x20,         // space
    411     uCR = 0xd,          // carriage return
    412     uLF = 0xa,          // line feed
    413     uNL = 0x85,         // newline
    414     uLS = 0x2028,       // line separator
    415     uPS = 0x2029,       // paragraph separator
    416     uSig = 0xfeff       // signature/BOM character
    417 };
    418 
    419 static inline int32_t
    420 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
    421     // find one of
    422     // CR, LF, CRLF, NL, LS, PS
    423     // for paragraph ends (see UAX #13/Unicode 4)
    424     // and include it in the chunk
    425     // all of these characters are on the BMP
    426     // do not include FF or VT in case they are part of a paragraph
    427     // (important for bidi contexts)
    428     static const UChar paraEnds[] = {
    429         0xd, 0xa, 0x85, 0x2028, 0x2029
    430     };
    431     enum {
    432         iCR, iLF, iNL, iLS, iPS, iCount
    433     };
    434 
    435     // first, see if there is a CRLF split between prev and s
    436     if (prev.endsWith(paraEnds + iCR, 1)) {
    437         if (s.startsWith(paraEnds + iLF, 1)) {
    438             return 1; // split CRLF, include the LF
    439         } else if (!s.isEmpty()) {
    440             return 0; // complete the last chunk
    441         } else {
    442             return -1; // wait for actual further contents to arrive
    443         }
    444     }
    445 
    446     const UChar *u = s.getBuffer(), *limit = u + s.length();
    447     UChar c;
    448 
    449     while (u < limit) {
    450         c = *u++;
    451         if (
    452             ((c < uSP) && (c == uCR || c == uLF)) ||
    453             (c == uNL) ||
    454             ((c & uLS) == uLS)
    455         ) {
    456             if (c == uCR) {
    457                 // check for CRLF
    458                 if (u == limit) {
    459                     return -1; // LF may be in the next chunk
    460                 } else if (*u == uLF) {
    461                     ++u; // include the LF in this chunk
    462                 }
    463             }
    464             return (int32_t)(u - s.getBuffer());
    465         }
    466     }
    467 
    468     return -1; // continue collecting the chunk
    469 }
    470 
    471 enum {
    472     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
    473     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
    474     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
    475 };
    476 
    477 static inline UChar
    478 nibbleToHex(uint8_t n) {
    479     n &= 0xf;
    480     return
    481         n <= 9 ?
    482             (UChar)(0x30 + n) :
    483             (UChar)((0x61 - 10) + n);
    484 }
    485 
    486 // check the converter's Unicode signature properties;
    487 // the fromUnicode side of the converter must be in its initial state
    488 // and will be reset again if it was used
    489 static int32_t
    490 cnvSigType(UConverter *cnv) {
    491     UErrorCode err;
    492     int32_t result;
    493 
    494     // test if the output charset can convert U+FEFF
    495     USet *set = uset_open(1, 0);
    496     err = U_ZERO_ERROR;
    497     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
    498     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
    499         result = CNV_WITH_FEFF;
    500     } else {
    501         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
    502     }
    503     uset_close(set);
    504 
    505     if (result == CNV_WITH_FEFF) {
    506         // test if the output charset emits a signature anyway
    507         const UChar a[1] = { 0x61 }; // "a"
    508         const UChar *in;
    509 
    510         char buffer[20];
    511         char *out;
    512 
    513         in = a;
    514         out = buffer;
    515         err = U_ZERO_ERROR;
    516         ucnv_fromUnicode(cnv,
    517             &out, buffer + sizeof(buffer),
    518             &in, a + 1,
    519             NULL, TRUE, &err);
    520         ucnv_resetFromUnicode(cnv);
    521 
    522         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
    523             U_SUCCESS(err)
    524         ) {
    525             result = CNV_ADDS_FEFF;
    526         }
    527     }
    528 
    529     return result;
    530 }
    531 
    532 class ConvertFile {
    533 public:
    534     ConvertFile() :
    535         buf(NULL), outbuf(NULL), fromoffsets(NULL),
    536         bufsz(0), signature(0) {}
    537 
    538     void
    539     setBufferSize(size_t bufferSize) {
    540         bufsz = bufferSize;
    541 
    542         buf = new char[2 * bufsz];
    543         outbuf = buf + bufsz;
    544 
    545         // +1 for an added U+FEFF in the intermediate Unicode buffer
    546         fromoffsets = new int32_t[bufsz + 1];
    547     }
    548 
    549     ~ConvertFile() {
    550         delete [] buf;
    551         delete [] fromoffsets;
    552     }
    553 
    554     UBool convertFile(const char *pname,
    555                       const char *fromcpage,
    556                       UConverterToUCallback toucallback,
    557                       const void *touctxt,
    558                       const char *tocpage,
    559                       UConverterFromUCallback fromucallback,
    560                       const void *fromuctxt,
    561                       UBool fallback,
    562                       const char *translit,
    563                       const char *infilestr,
    564                       FILE * outfile, int verbose);
    565 private:
    566     friend int main(int argc, char **argv);
    567 
    568     char *buf, *outbuf;
    569     int32_t *fromoffsets;
    570 
    571     size_t bufsz;
    572     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
    573 };
    574 
    575 // Convert a file from one encoding to another
    576 UBool
    577 ConvertFile::convertFile(const char *pname,
    578                          const char *fromcpage,
    579                          UConverterToUCallback toucallback,
    580                          const void *touctxt,
    581                          const char *tocpage,
    582                          UConverterFromUCallback fromucallback,
    583                          const void *fromuctxt,
    584                          UBool fallback,
    585                          const char *translit,
    586                          const char *infilestr,
    587                          FILE * outfile, int verbose)
    588 {
    589     FILE *infile;
    590     UBool ret = TRUE;
    591     UConverter *convfrom = 0;
    592     UConverter *convto = 0;
    593     UErrorCode err = U_ZERO_ERROR;
    594     UBool flush;
    595     UBool closeFile = FALSE;
    596     const char *cbufp, *prevbufp;
    597     char *bufp;
    598 
    599     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
    600 
    601     const UChar *unibuf, *unibufbp;
    602     UChar *unibufp;
    603 
    604     size_t rd, wr;
    605 
    606 #if !UCONFIG_NO_TRANSLITERATION
    607     Transliterator *t = 0;      // Transliterator acting on Unicode data.
    608     UnicodeString chunk;        // One chunk of the text being collected for transformation.
    609 #endif
    610     UnicodeString u;            // String to do the transliteration.
    611     int32_t ulen;
    612 
    613     // use conversion offsets for error messages
    614     // unless a transliterator is used -
    615     // a text transformation will reorder characters in unpredictable ways
    616     UBool useOffsets = TRUE;
    617 
    618     // Open the correct input file or connect to stdin for reading input
    619 
    620     if (infilestr != 0 && strcmp(infilestr, "-")) {
    621         infile = fopen(infilestr, "rb");
    622         if (infile == 0) {
    623             UnicodeString str1(infilestr, "");
    624             str1.append((UChar32) 0);
    625             UnicodeString str2(strerror(errno), "");
    626             str2.append((UChar32) 0);
    627             initMsg(pname);
    628             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
    629             return FALSE;
    630         }
    631         closeFile = TRUE;
    632     } else {
    633         infilestr = "-";
    634         infile = stdin;
    635 #ifdef USE_FILENO_BINARY_MODE
    636         if (setmode(fileno(stdin), O_BINARY) == -1) {
    637             initMsg(pname);
    638             u_wmsg(stderr, "cantSetInBinMode");
    639             return FALSE;
    640         }
    641 #endif
    642     }
    643 
    644     if (verbose) {
    645         fprintf(stderr, "%s:\n", infilestr);
    646     }
    647 
    648 #if !UCONFIG_NO_TRANSLITERATION
    649     // Create transliterator as needed.
    650 
    651     if (translit != NULL && *translit) {
    652         UParseError parse;
    653         UnicodeString str(translit), pestr;
    654 
    655         /* Create from rules or by ID as needed. */
    656 
    657         parse.line = -1;
    658 
    659         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
    660             t = Transliterator::createFromRules(UNICODE_STRING_SIMPLE("Uconv"), str, UTRANS_FORWARD, parse, err);
    661         } else {
    662             t = Transliterator::createInstance(UnicodeString(translit, -1, US_INV), UTRANS_FORWARD, err);
    663         }
    664 
    665         if (U_FAILURE(err)) {
    666             str.append((UChar32) 0);
    667             initMsg(pname);
    668 
    669             if (parse.line >= 0) {
    670                 UChar linebuf[20], offsetbuf[20];
    671                 uprv_itou(linebuf, 20, parse.line, 10, 0);
    672                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
    673                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
    674                     u_wmsg_errorName(err), linebuf, offsetbuf);
    675             } else {
    676                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
    677                     u_wmsg_errorName(err));
    678             }
    679 
    680             if (t) {
    681                 delete t;
    682                 t = 0;
    683             }
    684             goto error_exit;
    685         }
    686 
    687         useOffsets = FALSE;
    688     }
    689 #endif
    690 
    691     // Create codepage converter. If the codepage or its aliases weren't
    692     // available, it returns NULL and a failure code. We also set the
    693     // callbacks, and return errors in the same way.
    694 
    695     convfrom = ucnv_open(fromcpage, &err);
    696     if (U_FAILURE(err)) {
    697         UnicodeString str(fromcpage, "");
    698         initMsg(pname);
    699         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
    700             u_wmsg_errorName(err));
    701         goto error_exit;
    702     }
    703     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
    704     if (U_FAILURE(err)) {
    705         initMsg(pname);
    706         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    707         goto error_exit;
    708     }
    709 
    710     convto = ucnv_open(tocpage, &err);
    711     if (U_FAILURE(err)) {
    712         UnicodeString str(tocpage, "");
    713         initMsg(pname);
    714         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
    715             u_wmsg_errorName(err));
    716         goto error_exit;
    717     }
    718     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
    719     if (U_FAILURE(err)) {
    720         initMsg(pname);
    721         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    722         goto error_exit;
    723     }
    724     ucnv_setFallback(convto, fallback);
    725 
    726     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
    727     int8_t sig;
    728 
    729     // OK, we can convert now.
    730     sig = signature;
    731     rd = 0;
    732 
    733     do {
    734         willexit = FALSE;
    735 
    736         // input file offset at the beginning of the next buffer
    737         infoffset += rd;
    738 
    739         rd = fread(buf, 1, bufsz, infile);
    740         if (ferror(infile) != 0) {
    741             UnicodeString str(strerror(errno));
    742             initMsg(pname);
    743             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
    744             goto error_exit;
    745         }
    746 
    747         // Convert the read buffer into the new encoding via Unicode.
    748         // After the call 'unibufp' will be placed behind the last
    749         // character that was converted in the 'unibuf'.
    750         // Also the 'cbufp' is positioned behind the last converted
    751         // character.
    752         // At the last conversion in the file, flush should be set to
    753         // true so that we get all characters converted.
    754         //
    755         // The converter must be flushed at the end of conversion so
    756         // that characters on hold also will be written.
    757 
    758         cbufp = buf;
    759         flush = (UBool)(rd != bufsz);
    760 
    761         // convert until the input is consumed
    762         do {
    763             // remember the start of the current byte-to-Unicode conversion
    764             prevbufp = cbufp;
    765 
    766             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
    767 
    768             // Use bufsz instead of u.getCapacity() for the targetLimit
    769             // so that we don't overflow fromoffsets[].
    770             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
    771                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
    772 
    773             ulen = (int32_t)(unibufp - unibuf);
    774             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
    775 
    776             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
    777             // converting all of the input bytes.
    778             // It works like this because ucnv_toUnicode() returns only under the
    779             // following conditions:
    780             // - an error occurred during conversion (an error code is set)
    781             // - the target buffer is filled (the error code indicates an overflow)
    782             // - the source is consumed
    783             // That is, if the error code does not indicate a failure,
    784             // not even an overflow, then the source must be consumed entirely.
    785             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
    786 
    787             if (err == U_BUFFER_OVERFLOW_ERROR) {
    788                 err = U_ZERO_ERROR;
    789             } else if (U_FAILURE(err)) {
    790                 char pos[32], errorBytes[32];
    791                 int8_t i, length, errorLength;
    792 
    793                 UErrorCode localError = U_ZERO_ERROR;
    794                 errorLength = (int8_t)sizeof(errorBytes);
    795                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
    796                 if (U_FAILURE(localError) || errorLength == 0) {
    797                     errorLength = 1;
    798                 }
    799 
    800                 // print the input file offset of the start of the error bytes:
    801                 // input file offset of the current byte buffer +
    802                 // length of the just consumed bytes -
    803                 // length of the error bytes
    804                 length =
    805                     (int8_t)sprintf(pos, "%d",
    806                         (int)(infoffset + (cbufp - buf) - errorLength));
    807 
    808                 // output the bytes that caused the error
    809                 UnicodeString str;
    810                 for (i = 0; i < errorLength; ++i) {
    811                     if (i > 0) {
    812                         str.append((UChar)uSP);
    813                     }
    814                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
    815                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
    816                 }
    817 
    818                 initMsg(pname);
    819                 u_wmsg(stderr, "problemCvtToU",
    820                         UnicodeString(pos, length, "").getTerminatedBuffer(),
    821                         str.getTerminatedBuffer(),
    822                         u_wmsg_errorName(err));
    823 
    824                 willexit = TRUE;
    825                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
    826             }
    827 
    828             // Replaced a check for whether the input was consumed by
    829             // looping until it is; message key "premEndInput" now obsolete.
    830 
    831             if (ulen == 0) {
    832                 continue;
    833             }
    834 
    835             // remove a U+FEFF Unicode signature character if requested
    836             if (sig < 0) {
    837                 if (u.charAt(0) == uSig) {
    838                     u.remove(0, 1);
    839 
    840                     // account for the removed UChar and offset
    841                     --ulen;
    842 
    843                     if (useOffsets) {
    844                         // remove an offset from fromoffsets[] as well
    845                         // to keep the array parallel with the UChars
    846                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
    847                     }
    848 
    849                 }
    850                 sig = 0;
    851             }
    852 
    853 #if !UCONFIG_NO_TRANSLITERATION
    854             // Transliterate/transform if needed.
    855 
    856             // For transformation, we use chunking code -
    857             // collect Unicode input until, for example, an end-of-line,
    858             // then transform and output-convert that and continue collecting.
    859             // This makes the transformation result independent of the buffer size
    860             // while avoiding the slower keyboard mode.
    861             // The end-of-chunk characters are completely included in the
    862             // transformed string in case they are to be transformed themselves.
    863             if (t != NULL) {
    864                 UnicodeString out;
    865                 int32_t chunkLimit;
    866 
    867                 do {
    868                     chunkLimit = getChunkLimit(chunk, u);
    869                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
    870                         // use all of the rest at the end of the text
    871                         chunkLimit = u.length();
    872                     }
    873                     if (chunkLimit >= 0) {
    874                         // complete the chunk and transform it
    875                         chunk.append(u, 0, chunkLimit);
    876                         u.remove(0, chunkLimit);
    877                         t->transliterate(chunk);
    878 
    879                         // append the transformation result to the result and empty the chunk
    880                         out.append(chunk);
    881                         chunk.remove();
    882                     } else {
    883                         // continue collecting the chunk
    884                         chunk.append(u);
    885                         break;
    886                     }
    887                 } while (!u.isEmpty());
    888 
    889                 u = out;
    890                 ulen = u.length();
    891             }
    892 #endif
    893 
    894             // add a U+FEFF Unicode signature character if requested
    895             // and possible/necessary
    896             if (sig > 0) {
    897                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
    898                     u.insert(0, (UChar)uSig);
    899 
    900                     if (useOffsets) {
    901                         // insert a pseudo-offset into fromoffsets[] as well
    902                         // to keep the array parallel with the UChars
    903                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
    904                         fromoffsets[0] = -1;
    905                     }
    906 
    907                     // account for the additional UChar and offset
    908                     ++ulen;
    909                 }
    910                 sig = 0;
    911             }
    912 
    913             // Convert the Unicode buffer into the destination codepage
    914             // Again 'bufp' will be placed behind the last converted character
    915             // And 'unibufp' will be placed behind the last converted unicode character
    916             // At the last conversion flush should be set to true to ensure that
    917             // all characters left get converted
    918 
    919             unibuf = unibufbp = u.getBuffer();
    920 
    921             do {
    922                 bufp = outbuf;
    923 
    924                 // Use fromSawEndOfBytes in addition to the flush flag -
    925                 // it indicates whether the intermediate Unicode string
    926                 // contains the very last UChars for the very last input bytes.
    927                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
    928                                  &unibufbp,
    929                                  unibuf + ulen,
    930                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
    931 
    932                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
    933                 // converting all of the intermediate UChars.
    934                 // See comment for fromSawEndOfBytes.
    935                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
    936 
    937                 if (err == U_BUFFER_OVERFLOW_ERROR) {
    938                     err = U_ZERO_ERROR;
    939                 } else if (U_FAILURE(err)) {
    940                     UChar errorUChars[4];
    941                     const char *errtag;
    942                     char pos[32];
    943                     UChar32 c;
    944                     int8_t i, length, errorLength;
    945 
    946                     UErrorCode localError = U_ZERO_ERROR;
    947                     errorLength = (int8_t)UPRV_LENGTHOF(errorUChars);
    948                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
    949                     if (U_FAILURE(localError) || errorLength == 0) {
    950                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
    951                         errorLength = 1;
    952                     }
    953 
    954                     int32_t ferroffset;
    955 
    956                     if (useOffsets) {
    957                         // Unicode buffer offset of the start of the error UChars
    958                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
    959                         if (ferroffset < 0) {
    960                             // approximation - the character started in the previous Unicode buffer
    961                             ferroffset = 0;
    962                         }
    963 
    964                         // get the corresponding byte offset out of fromoffsets[]
    965                         // go back if the offset is not known for some of the UChars
    966                         int32_t fromoffset;
    967                         do {
    968                             fromoffset = fromoffsets[ferroffset];
    969                         } while (fromoffset < 0 && --ferroffset >= 0);
    970 
    971                         // total input file offset =
    972                         // input file offset of the current byte buffer +
    973                         // byte buffer offset of where the current Unicode buffer is converted from +
    974                         // fromoffsets[Unicode offset]
    975                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
    976                         errtag = "problemCvtFromU";
    977                     } else {
    978                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
    979                         // be different from what the offsets refer to.
    980 
    981                         // output file offset
    982                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
    983                         errtag = "problemCvtFromUOut";
    984                     }
    985 
    986                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
    987 
    988                     // output the code points that caused the error
    989                     UnicodeString str;
    990                     for (i = 0; i < errorLength;) {
    991                         if (i > 0) {
    992                             str.append((UChar)uSP);
    993                         }
    994                         U16_NEXT(errorUChars, i, errorLength, c);
    995                         if (c >= 0x100000) {
    996                             str.append(nibbleToHex((uint8_t)(c >> 20)));
    997                         }
    998                         if (c >= 0x10000) {
    999                             str.append(nibbleToHex((uint8_t)(c >> 16)));
   1000                         }
   1001                         str.append(nibbleToHex((uint8_t)(c >> 12)));
   1002                         str.append(nibbleToHex((uint8_t)(c >> 8)));
   1003                         str.append(nibbleToHex((uint8_t)(c >> 4)));
   1004                         str.append(nibbleToHex((uint8_t)c));
   1005                     }
   1006 
   1007                     initMsg(pname);
   1008                     u_wmsg(stderr, errtag,
   1009                             UnicodeString(pos, length, "").getTerminatedBuffer(),
   1010                             str.getTerminatedBuffer(),
   1011                            u_wmsg_errorName(err));
   1012                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
   1013 
   1014                     willexit = TRUE;
   1015                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
   1016                 }
   1017 
   1018                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
   1019                 // looping until they are; message key "premEnd" now obsolete.
   1020 
   1021                 // Finally, write the converted buffer to the output file
   1022                 size_t outlen = (size_t) (bufp - outbuf);
   1023                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
   1024                 if (wr != outlen) {
   1025                     UnicodeString str(strerror(errno));
   1026                     initMsg(pname);
   1027                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
   1028                     willexit = TRUE;
   1029                 }
   1030 
   1031                 if (willexit) {
   1032                     goto error_exit;
   1033                 }
   1034             } while (!toSawEndOfUnicode);
   1035         } while (!fromSawEndOfBytes);
   1036     } while (!flush);           // Stop when we have flushed the
   1037                                 // converters (this means that it's
   1038                                 // the end of output)
   1039 
   1040     goto normal_exit;
   1041 
   1042 error_exit:
   1043     ret = FALSE;
   1044 
   1045 normal_exit:
   1046     // Cleanup.
   1047 
   1048     ucnv_close(convfrom);
   1049     ucnv_close(convto);
   1050 
   1051 #if !UCONFIG_NO_TRANSLITERATION
   1052     delete t;
   1053 #endif
   1054 
   1055     if (closeFile) {
   1056         fclose(infile);
   1057     }
   1058 
   1059     return ret;
   1060 }
   1061 
   1062 static void usage(const char *pname, int ecode) {
   1063     const UChar *msg;
   1064     int32_t msgLen;
   1065     UErrorCode err = U_ZERO_ERROR;
   1066     FILE *fp = ecode ? stderr : stdout;
   1067     int res;
   1068 
   1069     initMsg(pname);
   1070     msg =
   1071         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
   1072                             &msgLen, &err);
   1073     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
   1074     UnicodeString mname(msg, msgLen + 1);
   1075 
   1076     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
   1077     if (!ecode) {
   1078         if (!res) {
   1079             fputc('\n', fp);
   1080         }
   1081         if (!u_wmsg(fp, "help")) {
   1082             /* Now dump callbacks and finish. */
   1083 
   1084             int i, count =
   1085                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
   1086             for (i = 0; i < count; ++i) {
   1087                 fprintf(fp, " %s", transcode_callbacks[i].name);
   1088             }
   1089             fputc('\n', fp);
   1090         }
   1091     }
   1092 
   1093     exit(ecode);
   1094 }
   1095 
   1096 extern int
   1097 main(int argc, char **argv)
   1098 {
   1099     FILE *outfile;
   1100     int ret = 0;
   1101 
   1102     size_t bufsz = DEFAULT_BUFSZ;
   1103 
   1104     const char *fromcpage = 0;
   1105     const char *tocpage = 0;
   1106     const char *translit = 0;
   1107     const char *outfilestr = 0;
   1108     UBool fallback = FALSE;
   1109 
   1110     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
   1111     const void *fromuctxt = 0;
   1112     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
   1113     const void *touctxt = 0;
   1114 
   1115     char **iter, **remainArgv, **remainArgvLimit;
   1116     char **end = argv + argc;
   1117 
   1118     const char *pname;
   1119 
   1120     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
   1121     const char *printName = 0;
   1122 
   1123     UBool verbose = FALSE;
   1124     UErrorCode status = U_ZERO_ERROR;
   1125 
   1126     ConvertFile cf;
   1127 
   1128     /* Initialize ICU */
   1129     u_init(&status);
   1130     if (U_FAILURE(status)) {
   1131         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
   1132             argv[0], u_errorName(status));
   1133         exit(1);
   1134     }
   1135 
   1136     // Get and prettify pname.
   1137     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
   1138 #if U_PLATFORM_USES_ONLY_WIN32_API
   1139     if (!pname) {
   1140         pname = uprv_strrchr(*argv, '/');
   1141     }
   1142 #endif
   1143     if (!pname) {
   1144         pname = *argv;
   1145     } else {
   1146         ++pname;
   1147     }
   1148 
   1149     // First, get the arguments from command-line
   1150     // to know the codepages to convert between
   1151 
   1152     remainArgv = remainArgvLimit = argv + 1;
   1153     for (iter = argv + 1; iter != end; iter++) {
   1154         // Check for from charset
   1155         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
   1156             iter++;
   1157             if (iter != end)
   1158                 fromcpage = *iter;
   1159             else
   1160                 usage(pname, 1);
   1161         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
   1162             iter++;
   1163             if (iter != end)
   1164                 tocpage = *iter;
   1165             else
   1166                 usage(pname, 1);
   1167         } else if (strcmp("-x", *iter) == 0) {
   1168             iter++;
   1169             if (iter != end)
   1170                 translit = *iter;
   1171             else
   1172                 usage(pname, 1);
   1173         } else if (!strcmp("--fallback", *iter)) {
   1174             fallback = TRUE;
   1175         } else if (!strcmp("--no-fallback", *iter)) {
   1176             fallback = FALSE;
   1177         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
   1178             iter++;
   1179             if (iter != end) {
   1180                 bufsz = atoi(*iter);
   1181                 if ((int) bufsz <= 0) {
   1182                     initMsg(pname);
   1183                     UnicodeString str(*iter);
   1184                     initMsg(pname);
   1185                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
   1186                     return 3;
   1187                 }
   1188             } else {
   1189                 usage(pname, 1);
   1190             }
   1191         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
   1192             if (printTranslits) {
   1193                 usage(pname, 1);
   1194             }
   1195             printConvs = TRUE;
   1196         } else if (strcmp("--default-code", *iter) == 0) {
   1197             if (printTranslits) {
   1198                 usage(pname, 1);
   1199             }
   1200             printName = ucnv_getDefaultName();
   1201         } else if (strcmp("--list-code", *iter) == 0) {
   1202             if (printTranslits) {
   1203                 usage(pname, 1);
   1204             }
   1205 
   1206             iter++;
   1207             if (iter != end) {
   1208                 UErrorCode e = U_ZERO_ERROR;
   1209                 printName = ucnv_getAlias(*iter, 0, &e);
   1210                 if (U_FAILURE(e) || !printName) {
   1211                     UnicodeString str(*iter);
   1212                     initMsg(pname);
   1213                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
   1214                     return 2;
   1215                 }
   1216             } else
   1217                 usage(pname, 1);
   1218         } else if (strcmp("--canon", *iter) == 0) {
   1219             printCanon = TRUE;
   1220         } else if (strcmp("-L", *iter) == 0
   1221             || !strcmp("--list-transliterators", *iter)) {
   1222             if (printConvs) {
   1223                 usage(pname, 1);
   1224             }
   1225             printTranslits = TRUE;
   1226         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
   1227             || !strcmp("--help", *iter)) {
   1228             usage(pname, 0);
   1229         } else if (!strcmp("-c", *iter)) {
   1230             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
   1231         } else if (!strcmp("--to-callback", *iter)) {
   1232             iter++;
   1233             if (iter != end) {
   1234                 const struct callback_ent *cbe = findCallback(*iter);
   1235                 if (cbe) {
   1236                     fromucallback = cbe->fromu;
   1237                     fromuctxt = cbe->fromuctxt;
   1238                 } else {
   1239                     UnicodeString str(*iter);
   1240                     initMsg(pname);
   1241                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1242                     return 4;
   1243                 }
   1244             } else {
   1245                 usage(pname, 1);
   1246             }
   1247         } else if (!strcmp("--from-callback", *iter)) {
   1248             iter++;
   1249             if (iter != end) {
   1250                 const struct callback_ent *cbe = findCallback(*iter);
   1251                 if (cbe) {
   1252                     toucallback = cbe->tou;
   1253                     touctxt = cbe->touctxt;
   1254                 } else {
   1255                     UnicodeString str(*iter);
   1256                     initMsg(pname);
   1257                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1258                     return 4;
   1259                 }
   1260             } else {
   1261                 usage(pname, 1);
   1262             }
   1263         } else if (!strcmp("-i", *iter)) {
   1264             toucallback = UCNV_TO_U_CALLBACK_SKIP;
   1265         } else if (!strcmp("--callback", *iter)) {
   1266             iter++;
   1267             if (iter != end) {
   1268                 const struct callback_ent *cbe = findCallback(*iter);
   1269                 if (cbe) {
   1270                     fromucallback = cbe->fromu;
   1271                     fromuctxt = cbe->fromuctxt;
   1272                     toucallback = cbe->tou;
   1273                     touctxt = cbe->touctxt;
   1274                 } else {
   1275                     UnicodeString str(*iter);
   1276                     initMsg(pname);
   1277                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1278                     return 4;
   1279                 }
   1280             } else {
   1281                 usage(pname, 1);
   1282             }
   1283         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
   1284             verbose = FALSE;
   1285         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
   1286             verbose = TRUE;
   1287         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
   1288             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
   1289             return 0;
   1290         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
   1291             ++iter;
   1292             if (iter != end && !outfilestr) {
   1293                 outfilestr = *iter;
   1294             } else {
   1295                 usage(pname, 1);
   1296             }
   1297         } else if (0 == strcmp("--add-signature", *iter)) {
   1298             cf.signature = 1;
   1299         } else if (0 == strcmp("--remove-signature", *iter)) {
   1300             cf.signature = -1;
   1301         } else if (**iter == '-' && (*iter)[1]) {
   1302             usage(pname, 1);
   1303         } else {
   1304             // move a non-option up in argv[]
   1305             *remainArgvLimit++ = *iter;
   1306         }
   1307     }
   1308 
   1309     if (printConvs || printName) {
   1310         return printConverters(pname, printName, printCanon) ? 2 : 0;
   1311     } else if (printTranslits) {
   1312         return printTransliterators(printCanon) ? 3 : 0;
   1313     }
   1314 
   1315     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
   1316         fromcpage = ucnv_getDefaultName();
   1317     }
   1318     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
   1319         tocpage = ucnv_getDefaultName();
   1320     }
   1321 
   1322     // Open the correct output file or connect to stdout for reading input
   1323     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
   1324         outfile = fopen(outfilestr, "wb");
   1325         if (outfile == 0) {
   1326             UnicodeString str1(outfilestr, "");
   1327             UnicodeString str2(strerror(errno), "");
   1328             initMsg(pname);
   1329             u_wmsg(stderr, "cantCreateOutputF",
   1330                 str1.getBuffer(), str2.getBuffer());
   1331             return 1;
   1332         }
   1333     } else {
   1334         outfilestr = "-";
   1335         outfile = stdout;
   1336 #ifdef USE_FILENO_BINARY_MODE
   1337         if (setmode(fileno(outfile), O_BINARY) == -1) {
   1338             u_wmsg(stderr, "cantSetOutBinMode");
   1339             exit(-1);
   1340         }
   1341 #endif
   1342     }
   1343 
   1344     /* Loop again on the arguments to find all the input files, and
   1345     convert them. */
   1346 
   1347     cf.setBufferSize(bufsz);
   1348 
   1349     if(remainArgv < remainArgvLimit) {
   1350         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
   1351             if (!cf.convertFile(
   1352                     pname, fromcpage, toucallback, touctxt, tocpage,
   1353                     fromucallback, fromuctxt, fallback, translit, *iter,
   1354                     outfile, verbose)
   1355             ) {
   1356                 goto error_exit;
   1357             }
   1358         }
   1359     } else {
   1360         if (!cf.convertFile(
   1361                 pname, fromcpage, toucallback, touctxt, tocpage,
   1362                 fromucallback, fromuctxt, fallback, translit, 0,
   1363                 outfile, verbose)
   1364         ) {
   1365             goto error_exit;
   1366         }
   1367     }
   1368 
   1369     goto normal_exit;
   1370 error_exit:
   1371 #if !UCONFIG_NO_LEGACY_CONVERSION
   1372     ret = 1;
   1373 #else
   1374     fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
   1375 #endif
   1376 normal_exit:
   1377 
   1378     if (outfile != stdout) {
   1379         fclose(outfile);
   1380     }
   1381 
   1382     u_cleanup();
   1383 
   1384     return ret;
   1385 }
   1386 
   1387 
   1388 /*
   1389  * Hey, Emacs, please set the following:
   1390  *
   1391  * Local Variables:
   1392  * indent-tabs-mode: nil
   1393  * End:
   1394  *
   1395  */
   1396