Home | History | Annotate | Download | only in uconv
      1 /*****************************************************************************
      2 *
      3 *   Copyright (C) 1999-2009, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *
      6 ******************************************************************************/
      7 
      8 /*
      9  * uconv(1): an iconv(1)-like converter using ICU.
     10  *
     11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom (at) vittran.norrnod.se>
     12  * contributed in 1999.
     13  *
     14  * Conversion to the C conversion API and many improvements by
     15  * Yves Arrouye <yves (at) realnames.com>, current maintainer.
     16  *
     17  * Markus Scherer maintainer from 2003.
     18  * See source code repository history for changes.
     19  */
     20 
     21 #include <unicode/utypes.h>
     22 #include <unicode/putil.h>
     23 #include <unicode/ucnv.h>
     24 #include <unicode/uenum.h>
     25 #include <unicode/unistr.h>
     26 #include <unicode/translit.h>
     27 #include <unicode/uset.h>
     28 #include <unicode/uclean.h>
     29 
     30 #include <stdio.h>
     31 #include <errno.h>
     32 #include <string.h>
     33 #include <stdlib.h>
     34 
     35 #include "cmemory.h"
     36 #include "cstring.h"
     37 #include "ustrfmt.h"
     38 
     39 #include "unicode/uwmsg.h"
     40 
     41 U_NAMESPACE_USE
     42 
     43 #if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
     44 #include <io.h>
     45 #include <fcntl.h>
     46 #if defined(U_WINDOWS)
     47 #define USE_FILENO_BINARY_MODE 1
     48 /* Windows likes to rename Unix-like functions */
     49 #ifndef fileno
     50 #define fileno _fileno
     51 #endif
     52 #ifndef setmode
     53 #define setmode _setmode
     54 #endif
     55 #ifndef O_BINARY
     56 #define O_BINARY _O_BINARY
     57 #endif
     58 #endif
     59 #endif
     60 
     61 #ifdef UCONVMSG_LINK
     62 /* below from the README */
     63 #include "unicode/utypes.h"
     64 #include "unicode/udata.h"
     65 U_CFUNC char uconvmsg_dat[];
     66 #endif
     67 
     68 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     69 
     70 #define DEFAULT_BUFSZ   4096
     71 #define UCONVMSG "uconvmsg"
     72 
     73 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
     74 
     75 /*
     76  * Initialize the message bundle so that message strings can be fetched
     77  * by u_wmsg().
     78  *
     79  */
     80 
     81 static void initMsg(const char *pname) {
     82     static int ps = 0;
     83 
     84     if (!ps) {
     85         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
     86         UErrorCode err = U_ZERO_ERROR;
     87 
     88         ps = 1;
     89 
     90         /* Set up our static data - if any */
     91 #ifdef UCONVMSG_LINK
     92         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
     93         if (U_FAILURE(err)) {
     94           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
     95                   pname, u_errorName(err));
     96           err = U_ZERO_ERROR; /* It may still fail */
     97         }
     98 #endif
     99 
    100         /* Get messages. */
    101         gBundle = u_wmsg_setPath(UCONVMSG, &err);
    102         if (U_FAILURE(err)) {
    103             fprintf(stderr,
    104                     "%s: warning: couldn't open bundle %s: %s\n",
    105                     pname, UCONVMSG, u_errorName(err));
    106 #ifdef UCONVMSG_LINK
    107             fprintf(stderr,
    108                     "%s: setAppData was called, internal data %s failed to load\n",
    109                         pname, UCONVMSG);
    110 #endif
    111 
    112             err = U_ZERO_ERROR;
    113             /* that was try #1, try again with a path */
    114             uprv_strcpy(dataPath, u_getDataDirectory());
    115             uprv_strcat(dataPath, U_FILE_SEP_STRING);
    116             uprv_strcat(dataPath, UCONVMSG);
    117 
    118             gBundle = u_wmsg_setPath(dataPath, &err);
    119             if (U_FAILURE(err)) {
    120                 fprintf(stderr,
    121                     "%s: warning: still couldn't open bundle %s: %s\n",
    122                     pname, dataPath, u_errorName(err));
    123                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
    124             }
    125         }
    126     }
    127 }
    128 
    129 /* Mapping of callback names to the callbacks passed to the converter
    130    API. */
    131 
    132 static struct callback_ent {
    133     const char *name;
    134     UConverterFromUCallback fromu;
    135     const void *fromuctxt;
    136     UConverterToUCallback tou;
    137     const void *touctxt;
    138 } transcode_callbacks[] = {
    139     { "substitute",
    140       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
    141       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
    142     { "skip",
    143       UCNV_FROM_U_CALLBACK_SKIP, 0,
    144       UCNV_TO_U_CALLBACK_SKIP, 0 },
    145     { "stop",
    146       UCNV_FROM_U_CALLBACK_STOP, 0,
    147       UCNV_TO_U_CALLBACK_STOP, 0 },
    148     { "escape",
    149       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
    150       UCNV_TO_U_CALLBACK_ESCAPE, 0},
    151     { "escape-icu",
    152       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
    153       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
    154     { "escape-java",
    155       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
    156       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
    157     { "escape-c",
    158       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
    159       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
    160     { "escape-xml",
    161       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    162       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    163     { "escape-xml-hex",
    164       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    165       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    166     { "escape-xml-dec",
    167       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
    168       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
    169     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
    170       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
    171 };
    172 
    173 /* Return a pointer to a callback record given its name. */
    174 
    175 static const struct callback_ent *findCallback(const char *name) {
    176     int i, count =
    177         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
    178 
    179     /* We'll do a linear search, there aren't many of them and bsearch()
    180        may not be that portable. */
    181 
    182     for (i = 0; i < count; ++i) {
    183         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
    184             return &transcode_callbacks[i];
    185         }
    186     }
    187 
    188     return 0;
    189 }
    190 
    191 /* Print converter information. If lookfor is set, only that converter will
    192    be printed, otherwise all converters will be printed. If canon is non
    193    zero, tags and aliases for each converter are printed too, in the format
    194    expected for convrters.txt(5). */
    195 
    196 static int printConverters(const char *pname, const char *lookfor,
    197     UBool canon)
    198 {
    199     UErrorCode err = U_ZERO_ERROR;
    200     int32_t num;
    201     uint16_t num_stds;
    202     const char **stds;
    203 
    204     /* If there is a specified name, just handle that now. */
    205 
    206     if (lookfor) {
    207         if (!canon) {
    208             printf("%s\n", lookfor);
    209             return 0;
    210         } else {
    211         /*  Because we are printing a canonical name, we need the
    212             true converter name. We've done that already except for
    213             the default name (because we want to print the exact
    214             name one would get when calling ucnv_getDefaultName()
    215             in non-canon mode). But since we do not know at this
    216             point if we have the default name or something else, we
    217             need to normalize again to the canonical converter
    218             name. */
    219 
    220             const char *truename = ucnv_getAlias(lookfor, 0, &err);
    221             if (U_SUCCESS(err)) {
    222                 lookfor = truename;
    223             } else {
    224                 err = U_ZERO_ERROR;
    225             }
    226         }
    227     }
    228 
    229     /* Print converter names. We come here for one of two reasons: we
    230        are printing all the names (lookfor was null), or we have a
    231        single converter to print but in canon mode, hence we need to
    232        get to it in order to print everything. */
    233 
    234     num = ucnv_countAvailable();
    235     if (num <= 0) {
    236         initMsg(pname);
    237         u_wmsg(stderr, "cantGetNames");
    238         return -1;
    239     }
    240     if (lookfor) {
    241         num = 1;                /* We know where we want to be. */
    242     }
    243 
    244     num_stds = ucnv_countStandards();
    245     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
    246     if (!stds) {
    247         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
    248         return -1;
    249     } else {
    250         uint16_t s;
    251 
    252         if (canon) {
    253             printf("{ ");
    254         }
    255         for (s = 0; s < num_stds; ++s) {
    256             stds[s] = ucnv_getStandard(s, &err);
    257             if (canon) {
    258                 printf("%s ", stds[s]);
    259             }
    260             if (U_FAILURE(err)) {
    261                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
    262                 goto error_cleanup;
    263             }
    264         }
    265         if (canon) {
    266             puts("}");
    267         }
    268     }
    269 
    270     for (int32_t i = 0; i < num; i++) {
    271         const char *name;
    272         uint16_t num_aliases;
    273 
    274         /* Set the name either to what we are looking for, or
    275         to the current converter name. */
    276 
    277         if (lookfor) {
    278             name = lookfor;
    279         } else {
    280             name = ucnv_getAvailableName(i);
    281         }
    282 
    283         /* Get all the aliases associated to the name. */
    284 
    285         err = U_ZERO_ERROR;
    286         num_aliases = ucnv_countAliases(name, &err);
    287         if (U_FAILURE(err)) {
    288             printf("%s", name);
    289 
    290             UnicodeString str(name, "");
    291             putchar('\t');
    292             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    293                 u_wmsg_errorName(err));
    294             goto error_cleanup;
    295         } else {
    296             uint16_t a, s, t;
    297 
    298             /* Write all the aliases and their tags. */
    299 
    300             for (a = 0; a < num_aliases; ++a) {
    301                 const char *alias = ucnv_getAlias(name, a, &err);
    302 
    303                 if (U_FAILURE(err)) {
    304                     UnicodeString str(name, "");
    305                     putchar('\t');
    306                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    307                         u_wmsg_errorName(err));
    308                     goto error_cleanup;
    309                 }
    310 
    311                 /* Print the current alias so that it looks right. */
    312                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
    313                                  alias,
    314                                  (canon ? "" : " "));
    315 
    316                 /* Look (slowly, linear searching) for a tag. */
    317 
    318                 if (canon) {
    319                     /* -1 to skip the last standard */
    320                     for (s = t = 0; s < num_stds-1; ++s) {
    321                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
    322                         if (U_SUCCESS(err)) {
    323                             /* List the standard tags */
    324                             const char *standardName;
    325                             UBool isFirst = TRUE;
    326                             UErrorCode enumError = U_ZERO_ERROR;
    327                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
    328                                 /* See if this alias is supported by this standard. */
    329                                 if (!strcmp(standardName, alias)) {
    330                                     if (!t) {
    331                                         printf(" {");
    332                                         t = 1;
    333                                     }
    334                                     /* Print a * after the default standard name */
    335                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
    336                                 }
    337                                 isFirst = FALSE;
    338                             }
    339                         }
    340                     }
    341                     if (t) {
    342                         printf(" }");
    343                     }
    344                 }
    345                 /* Terminate this entry. */
    346                 if (canon) {
    347                     puts("");
    348                 }
    349 
    350                 /* Move on. */
    351             }
    352             /* Terminate this entry. */
    353             if (!canon) {
    354                 puts("");
    355             }
    356         }
    357     }
    358 
    359     /* Free temporary data. */
    360 
    361     uprv_free(stds);
    362 
    363     /* Success. */
    364 
    365     return 0;
    366 error_cleanup:
    367     uprv_free(stds);
    368     return -1;
    369 }
    370 
    371 /* Print all available transliterators. If canon is non zero, print
    372    one transliterator per line. */
    373 
    374 static int printTransliterators(UBool canon)
    375 {
    376 #if UCONFIG_NO_TRANSLITERATION
    377     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
    378     return 1;
    379 #else
    380     int32_t numtrans = utrans_countAvailableIDs(), i;
    381     int buflen = 512;
    382     char *buf = (char *) uprv_malloc(buflen);
    383     char staticbuf[512];
    384 
    385     char sepchar = canon ? '\n' : ' ';
    386 
    387     if (!buf) {
    388         buf = staticbuf;
    389         buflen = sizeof(staticbuf);
    390     }
    391 
    392     for (i = 0; i < numtrans; ++i) {
    393         int32_t len = utrans_getAvailableID(i, buf, buflen);
    394         if (len >= buflen - 1) {
    395             if (buf != staticbuf) {
    396                 buflen <<= 1;
    397                 if (buflen < len) {
    398                     buflen = len + 64;
    399                 }
    400                 buf = (char *) uprv_realloc(buf, buflen);
    401                 if (!buf) {
    402                     buf = staticbuf;
    403                     buflen = sizeof(staticbuf);
    404                 }
    405             }
    406             utrans_getAvailableID(i, buf, buflen);
    407             if (len >= buflen) {
    408                 uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
    409             }
    410         }
    411 
    412         printf("%s", buf);
    413         if (i < numtrans - 1) {
    414             putchar(sepchar);
    415         }
    416     }
    417 
    418     /* Add a terminating newline if needed. */
    419 
    420     if (sepchar != '\n') {
    421         putchar('\n');
    422     }
    423 
    424     /* Free temporary data. */
    425 
    426     if (buf != staticbuf) {
    427         uprv_free(buf);
    428     }
    429 
    430     /* Success. */
    431 
    432     return 0;
    433 #endif
    434 }
    435 
    436 enum {
    437     uSP = 0x20,         // space
    438     uCR = 0xd,          // carriage return
    439     uLF = 0xa,          // line feed
    440     uNL = 0x85,         // newline
    441     uLS = 0x2028,       // line separator
    442     uPS = 0x2029,       // paragraph separator
    443     uSig = 0xfeff       // signature/BOM character
    444 };
    445 
    446 static inline int32_t
    447 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
    448     // find one of
    449     // CR, LF, CRLF, NL, LS, PS
    450     // for paragraph ends (see UAX #13/Unicode 4)
    451     // and include it in the chunk
    452     // all of these characters are on the BMP
    453     // do not include FF or VT in case they are part of a paragraph
    454     // (important for bidi contexts)
    455     static const UChar paraEnds[] = {
    456         0xd, 0xa, 0x85, 0x2028, 0x2029
    457     };
    458     enum {
    459         iCR, iLF, iNL, iLS, iPS, iCount
    460     };
    461 
    462     // first, see if there is a CRLF split between prev and s
    463     if (prev.endsWith(paraEnds + iCR, 1)) {
    464         if (s.startsWith(paraEnds + iLF, 1)) {
    465             return 1; // split CRLF, include the LF
    466         } else if (!s.isEmpty()) {
    467             return 0; // complete the last chunk
    468         } else {
    469             return -1; // wait for actual further contents to arrive
    470         }
    471     }
    472 
    473     const UChar *u = s.getBuffer(), *limit = u + s.length();
    474     UChar c;
    475 
    476     while (u < limit) {
    477         c = *u++;
    478         if (
    479             ((c < uSP) && (c == uCR || c == uLF)) ||
    480             (c == uNL) ||
    481             ((c & uLS) == uLS)
    482         ) {
    483             if (c == uCR) {
    484                 // check for CRLF
    485                 if (u == limit) {
    486                     return -1; // LF may be in the next chunk
    487                 } else if (*u == uLF) {
    488                     ++u; // include the LF in this chunk
    489                 }
    490             }
    491             return (int32_t)(u - s.getBuffer());
    492         }
    493     }
    494 
    495     return -1; // continue collecting the chunk
    496 }
    497 
    498 enum {
    499     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
    500     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
    501     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
    502 };
    503 
    504 static inline UChar
    505 nibbleToHex(uint8_t n) {
    506     n &= 0xf;
    507     return
    508         n <= 9 ?
    509             (UChar)(0x30 + n) :
    510             (UChar)((0x61 - 10) + n);
    511 }
    512 
    513 // check the converter's Unicode signature properties;
    514 // the fromUnicode side of the converter must be in its initial state
    515 // and will be reset again if it was used
    516 static int32_t
    517 cnvSigType(UConverter *cnv) {
    518     UErrorCode err;
    519     int32_t result;
    520 
    521     // test if the output charset can convert U+FEFF
    522     USet *set = uset_open(1, 0);
    523     err = U_ZERO_ERROR;
    524     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
    525     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
    526         result = CNV_WITH_FEFF;
    527     } else {
    528         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
    529     }
    530     uset_close(set);
    531 
    532     if (result == CNV_WITH_FEFF) {
    533         // test if the output charset emits a signature anyway
    534         const UChar a[1] = { 0x61 }; // "a"
    535         const UChar *in;
    536 
    537         char buffer[20];
    538         char *out;
    539 
    540         in = a;
    541         out = buffer;
    542         err = U_ZERO_ERROR;
    543         ucnv_fromUnicode(cnv,
    544             &out, buffer + sizeof(buffer),
    545             &in, a + 1,
    546             NULL, TRUE, &err);
    547         ucnv_resetFromUnicode(cnv);
    548 
    549         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
    550             U_SUCCESS(err)
    551         ) {
    552             result = CNV_ADDS_FEFF;
    553         }
    554     }
    555 
    556     return result;
    557 }
    558 
    559 class ConvertFile {
    560 public:
    561     ConvertFile() :
    562         buf(NULL), outbuf(NULL), fromoffsets(NULL),
    563         bufsz(0), signature(0) {}
    564 
    565     void
    566     setBufferSize(size_t bufferSize) {
    567         bufsz = bufferSize;
    568 
    569         buf = new char[2 * bufsz];
    570         outbuf = buf + bufsz;
    571 
    572         // +1 for an added U+FEFF in the intermediate Unicode buffer
    573         fromoffsets = new int32_t[bufsz + 1];
    574     }
    575 
    576     ~ConvertFile() {
    577         delete [] buf;
    578         delete [] fromoffsets;
    579     }
    580 
    581     UBool convertFile(const char *pname,
    582                       const char *fromcpage,
    583                       UConverterToUCallback toucallback,
    584                       const void *touctxt,
    585                       const char *tocpage,
    586                       UConverterFromUCallback fromucallback,
    587                       const void *fromuctxt,
    588                       UBool fallback,
    589                       const char *translit,
    590                       const char *infilestr,
    591                       FILE * outfile, int verbose);
    592 private:
    593     friend int main(int argc, char **argv);
    594 
    595     char *buf, *outbuf;
    596     int32_t *fromoffsets;
    597 
    598     size_t bufsz;
    599     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
    600 };
    601 
    602 // Convert a file from one encoding to another
    603 UBool
    604 ConvertFile::convertFile(const char *pname,
    605                          const char *fromcpage,
    606                          UConverterToUCallback toucallback,
    607                          const void *touctxt,
    608                          const char *tocpage,
    609                          UConverterFromUCallback fromucallback,
    610                          const void *fromuctxt,
    611                          UBool fallback,
    612                          const char *translit,
    613                          const char *infilestr,
    614                          FILE * outfile, int verbose)
    615 {
    616     FILE *infile;
    617     UBool ret = TRUE;
    618     UConverter *convfrom = 0;
    619     UConverter *convto = 0;
    620     UErrorCode err = U_ZERO_ERROR;
    621     UBool flush;
    622     const char *cbufp, *prevbufp;
    623     char *bufp;
    624 
    625     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
    626 
    627     const UChar *unibuf, *unibufbp;
    628     UChar *unibufp;
    629 
    630     size_t rd, wr;
    631 
    632 #if !UCONFIG_NO_TRANSLITERATION
    633     Transliterator *t = 0;      // Transliterator acting on Unicode data.
    634     UnicodeString chunk;        // One chunk of the text being collected for transformation.
    635 #endif
    636     UnicodeString u;            // String to do the transliteration.
    637     int32_t ulen;
    638 
    639     // use conversion offsets for error messages
    640     // unless a transliterator is used -
    641     // a text transformation will reorder characters in unpredictable ways
    642     UBool useOffsets = TRUE;
    643 
    644     // Open the correct input file or connect to stdin for reading input
    645 
    646     if (infilestr != 0 && strcmp(infilestr, "-")) {
    647         infile = fopen(infilestr, "rb");
    648         if (infile == 0) {
    649             UnicodeString str1(infilestr, "");
    650             str1.append((UChar32) 0);
    651             UnicodeString str2(strerror(errno), "");
    652             str2.append((UChar32) 0);
    653             initMsg(pname);
    654             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
    655             return FALSE;
    656         }
    657     } else {
    658         infilestr = "-";
    659         infile = stdin;
    660 #ifdef USE_FILENO_BINARY_MODE
    661         if (setmode(fileno(stdin), O_BINARY) == -1) {
    662             initMsg(pname);
    663             u_wmsg(stderr, "cantSetInBinMode");
    664             return FALSE;
    665         }
    666 #endif
    667     }
    668 
    669     if (verbose) {
    670         fprintf(stderr, "%s:\n", infilestr);
    671     }
    672 
    673 #if !UCONFIG_NO_TRANSLITERATION
    674     // Create transliterator as needed.
    675 
    676     if (translit != NULL && *translit) {
    677         UParseError parse;
    678         UnicodeString str(translit), pestr;
    679 
    680         /* Create from rules or by ID as needed. */
    681 
    682         parse.line = -1;
    683 
    684         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
    685             t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
    686         } else {
    687             t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
    688         }
    689 
    690         if (U_FAILURE(err)) {
    691             str.append((UChar32) 0);
    692             initMsg(pname);
    693 
    694             if (parse.line >= 0) {
    695                 UChar linebuf[20], offsetbuf[20];
    696                 uprv_itou(linebuf, 20, parse.line, 10, 0);
    697                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
    698                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
    699                     u_wmsg_errorName(err), linebuf, offsetbuf);
    700             } else {
    701                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
    702                     u_wmsg_errorName(err));
    703             }
    704 
    705             if (t) {
    706                 delete t;
    707                 t = 0;
    708             }
    709             goto error_exit;
    710         }
    711 
    712         useOffsets = FALSE;
    713     }
    714 #endif
    715 
    716     // Create codepage converter. If the codepage or its aliases weren't
    717     // available, it returns NULL and a failure code. We also set the
    718     // callbacks, and return errors in the same way.
    719 
    720     convfrom = ucnv_open(fromcpage, &err);
    721     if (U_FAILURE(err)) {
    722         UnicodeString str(fromcpage, "");
    723         initMsg(pname);
    724         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
    725             u_wmsg_errorName(err));
    726         goto error_exit;
    727     }
    728     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
    729     if (U_FAILURE(err)) {
    730         initMsg(pname);
    731         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    732         goto error_exit;
    733     }
    734 
    735     convto = ucnv_open(tocpage, &err);
    736     if (U_FAILURE(err)) {
    737         UnicodeString str(tocpage, "");
    738         initMsg(pname);
    739         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
    740             u_wmsg_errorName(err));
    741         goto error_exit;
    742     }
    743     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
    744     if (U_FAILURE(err)) {
    745         initMsg(pname);
    746         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    747         goto error_exit;
    748     }
    749     ucnv_setFallback(convto, fallback);
    750 
    751     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
    752     int8_t sig;
    753 
    754     // OK, we can convert now.
    755     sig = signature;
    756     rd = 0;
    757 
    758     do {
    759         willexit = FALSE;
    760 
    761         // input file offset at the beginning of the next buffer
    762         infoffset += rd;
    763 
    764         rd = fread(buf, 1, bufsz, infile);
    765         if (ferror(infile) != 0) {
    766             UnicodeString str(strerror(errno));
    767             initMsg(pname);
    768             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
    769             goto error_exit;
    770         }
    771 
    772         // Convert the read buffer into the new encoding via Unicode.
    773         // After the call 'unibufp' will be placed behind the last
    774         // character that was converted in the 'unibuf'.
    775         // Also the 'cbufp' is positioned behind the last converted
    776         // character.
    777         // At the last conversion in the file, flush should be set to
    778         // true so that we get all characters converted.
    779         //
    780         // The converter must be flushed at the end of conversion so
    781         // that characters on hold also will be written.
    782 
    783         cbufp = buf;
    784         flush = (UBool)(rd != bufsz);
    785 
    786         // convert until the input is consumed
    787         do {
    788             // remember the start of the current byte-to-Unicode conversion
    789             prevbufp = cbufp;
    790 
    791             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
    792 
    793             // Use bufsz instead of u.getCapacity() for the targetLimit
    794             // so that we don't overflow fromoffsets[].
    795             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
    796                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
    797 
    798             ulen = (int32_t)(unibufp - unibuf);
    799             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
    800 
    801             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
    802             // converting all of the input bytes.
    803             // It works like this because ucnv_toUnicode() returns only under the
    804             // following conditions:
    805             // - an error occurred during conversion (an error code is set)
    806             // - the target buffer is filled (the error code indicates an overflow)
    807             // - the source is consumed
    808             // That is, if the error code does not indicate a failure,
    809             // not even an overflow, then the source must be consumed entirely.
    810             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
    811 
    812             if (err == U_BUFFER_OVERFLOW_ERROR) {
    813                 err = U_ZERO_ERROR;
    814             } else if (U_FAILURE(err)) {
    815                 char pos[32], errorBytes[32];
    816                 int8_t i, length, errorLength;
    817 
    818                 UErrorCode localError = U_ZERO_ERROR;
    819                 errorLength = (int8_t)sizeof(errorBytes);
    820                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
    821                 if (U_FAILURE(localError) || errorLength == 0) {
    822                     errorLength = 1;
    823                 }
    824 
    825                 // print the input file offset of the start of the error bytes:
    826                 // input file offset of the current byte buffer +
    827                 // length of the just consumed bytes -
    828                 // length of the error bytes
    829                 length =
    830                     (int8_t)sprintf(pos, "%d",
    831                         (int)(infoffset + (cbufp - buf) - errorLength));
    832 
    833                 // output the bytes that caused the error
    834                 UnicodeString str;
    835                 for (i = 0; i < errorLength; ++i) {
    836                     if (i > 0) {
    837                         str.append((UChar)uSP);
    838                     }
    839                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
    840                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
    841                 }
    842 
    843                 initMsg(pname);
    844                 u_wmsg(stderr, "problemCvtToU",
    845                         UnicodeString(pos, length, "").getTerminatedBuffer(),
    846                         str.getTerminatedBuffer(),
    847                         u_wmsg_errorName(err));
    848 
    849                 willexit = TRUE;
    850                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
    851             }
    852 
    853             // Replaced a check for whether the input was consumed by
    854             // looping until it is; message key "premEndInput" now obsolete.
    855 
    856             if (ulen == 0) {
    857                 continue;
    858             }
    859 
    860             // remove a U+FEFF Unicode signature character if requested
    861             if (sig < 0) {
    862                 if (u.charAt(0) == uSig) {
    863                     u.remove(0, 1);
    864 
    865                     // account for the removed UChar and offset
    866                     --ulen;
    867 
    868                     if (useOffsets) {
    869                         // remove an offset from fromoffsets[] as well
    870                         // to keep the array parallel with the UChars
    871                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
    872                     }
    873 
    874                 }
    875                 sig = 0;
    876             }
    877 
    878 #if !UCONFIG_NO_TRANSLITERATION
    879             // Transliterate/transform if needed.
    880 
    881             // For transformation, we use chunking code -
    882             // collect Unicode input until, for example, an end-of-line,
    883             // then transform and output-convert that and continue collecting.
    884             // This makes the transformation result independent of the buffer size
    885             // while avoiding the slower keyboard mode.
    886             // The end-of-chunk characters are completely included in the
    887             // transformed string in case they are to be transformed themselves.
    888             if (t != NULL) {
    889                 UnicodeString out;
    890                 int32_t chunkLimit;
    891 
    892                 do {
    893                     chunkLimit = getChunkLimit(chunk, u);
    894                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
    895                         // use all of the rest at the end of the text
    896                         chunkLimit = u.length();
    897                     }
    898                     if (chunkLimit >= 0) {
    899                         // complete the chunk and transform it
    900                         chunk.append(u, 0, chunkLimit);
    901                         u.remove(0, chunkLimit);
    902                         t->transliterate(chunk);
    903 
    904                         // append the transformation result to the result and empty the chunk
    905                         out.append(chunk);
    906                         chunk.remove();
    907                     } else {
    908                         // continue collecting the chunk
    909                         chunk.append(u);
    910                         break;
    911                     }
    912                 } while (!u.isEmpty());
    913 
    914                 u = out;
    915                 ulen = u.length();
    916             }
    917 #endif
    918 
    919             // add a U+FEFF Unicode signature character if requested
    920             // and possible/necessary
    921             if (sig > 0) {
    922                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
    923                     u.insert(0, (UChar)uSig);
    924 
    925                     if (useOffsets) {
    926                         // insert a pseudo-offset into fromoffsets[] as well
    927                         // to keep the array parallel with the UChars
    928                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
    929                         fromoffsets[0] = -1;
    930                     }
    931 
    932                     // account for the additional UChar and offset
    933                     ++ulen;
    934                 }
    935                 sig = 0;
    936             }
    937 
    938             // Convert the Unicode buffer into the destination codepage
    939             // Again 'bufp' will be placed behind the last converted character
    940             // And 'unibufp' will be placed behind the last converted unicode character
    941             // At the last conversion flush should be set to true to ensure that
    942             // all characters left get converted
    943 
    944             unibuf = unibufbp = u.getBuffer();
    945 
    946             do {
    947                 bufp = outbuf;
    948 
    949                 // Use fromSawEndOfBytes in addition to the flush flag -
    950                 // it indicates whether the intermediate Unicode string
    951                 // contains the very last UChars for the very last input bytes.
    952                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
    953                                  &unibufbp,
    954                                  unibuf + ulen,
    955                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
    956 
    957                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
    958                 // converting all of the intermediate UChars.
    959                 // See comment for fromSawEndOfBytes.
    960                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
    961 
    962                 if (err == U_BUFFER_OVERFLOW_ERROR) {
    963                     err = U_ZERO_ERROR;
    964                 } else if (U_FAILURE(err)) {
    965                     UChar errorUChars[4];
    966                     const char *errtag;
    967                     char pos[32];
    968                     UChar32 c;
    969                     int8_t i, length, errorLength;
    970 
    971                     UErrorCode localError = U_ZERO_ERROR;
    972                     errorLength = (int8_t)LENGTHOF(errorUChars);
    973                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
    974                     if (U_FAILURE(localError) || errorLength == 0) {
    975                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
    976                         errorLength = 1;
    977                     }
    978 
    979                     int32_t ferroffset;
    980 
    981                     if (useOffsets) {
    982                         // Unicode buffer offset of the start of the error UChars
    983                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
    984                         if (ferroffset < 0) {
    985                             // approximation - the character started in the previous Unicode buffer
    986                             ferroffset = 0;
    987                         }
    988 
    989                         // get the corresponding byte offset out of fromoffsets[]
    990                         // go back if the offset is not known for some of the UChars
    991                         int32_t fromoffset;
    992                         do {
    993                             fromoffset = fromoffsets[ferroffset];
    994                         } while (fromoffset < 0 && --ferroffset >= 0);
    995 
    996                         // total input file offset =
    997                         // input file offset of the current byte buffer +
    998                         // byte buffer offset of where the current Unicode buffer is converted from +
    999                         // fromoffsets[Unicode offset]
   1000                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
   1001                         errtag = "problemCvtFromU";
   1002                     } else {
   1003                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
   1004                         // be different from what the offsets refer to.
   1005 
   1006                         // output file offset
   1007                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
   1008                         errtag = "problemCvtFromUOut";
   1009                     }
   1010 
   1011                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
   1012 
   1013                     // output the code points that caused the error
   1014                     UnicodeString str;
   1015                     for (i = 0; i < errorLength;) {
   1016                         if (i > 0) {
   1017                             str.append((UChar)uSP);
   1018                         }
   1019                         U16_NEXT(errorUChars, i, errorLength, c);
   1020                         if (c >= 0x100000) {
   1021                             str.append(nibbleToHex((uint8_t)(c >> 20)));
   1022                         }
   1023                         if (c >= 0x10000) {
   1024                             str.append(nibbleToHex((uint8_t)(c >> 16)));
   1025                         }
   1026                         str.append(nibbleToHex((uint8_t)(c >> 12)));
   1027                         str.append(nibbleToHex((uint8_t)(c >> 8)));
   1028                         str.append(nibbleToHex((uint8_t)(c >> 4)));
   1029                         str.append(nibbleToHex((uint8_t)c));
   1030                     }
   1031 
   1032                     initMsg(pname);
   1033                     u_wmsg(stderr, errtag,
   1034                             UnicodeString(pos, length, "").getTerminatedBuffer(),
   1035                             str.getTerminatedBuffer(),
   1036                            u_wmsg_errorName(err));
   1037                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
   1038 
   1039                     willexit = TRUE;
   1040                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
   1041                 }
   1042 
   1043                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
   1044                 // looping until they are; message key "premEnd" now obsolete.
   1045 
   1046                 // Finally, write the converted buffer to the output file
   1047                 size_t outlen = (size_t) (bufp - outbuf);
   1048                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
   1049                 if (wr != outlen) {
   1050                     UnicodeString str(strerror(errno));
   1051                     initMsg(pname);
   1052                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
   1053                     willexit = TRUE;
   1054                 }
   1055 
   1056                 if (willexit) {
   1057                     goto error_exit;
   1058                 }
   1059             } while (!toSawEndOfUnicode);
   1060         } while (!fromSawEndOfBytes);
   1061     } while (!flush);           // Stop when we have flushed the
   1062                                 // converters (this means that it's
   1063                                 // the end of output)
   1064 
   1065     goto normal_exit;
   1066 
   1067 error_exit:
   1068     ret = FALSE;
   1069 
   1070 normal_exit:
   1071     // Cleanup.
   1072 
   1073     ucnv_close(convfrom);
   1074     ucnv_close(convto);
   1075 
   1076 #if !UCONFIG_NO_TRANSLITERATION
   1077     delete t;
   1078 #endif
   1079 
   1080     if (infile != stdin) {
   1081         fclose(infile);
   1082     }
   1083 
   1084     return ret;
   1085 }
   1086 
   1087 static void usage(const char *pname, int ecode) {
   1088     const UChar *msg;
   1089     int32_t msgLen;
   1090     UErrorCode err = U_ZERO_ERROR;
   1091     FILE *fp = ecode ? stderr : stdout;
   1092     int res;
   1093 
   1094     initMsg(pname);
   1095     msg =
   1096         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
   1097                             &msgLen, &err);
   1098     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
   1099     UnicodeString mname(msg, msgLen + 1);
   1100 
   1101     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
   1102     if (!ecode) {
   1103         if (!res) {
   1104             fputc('\n', fp);
   1105         }
   1106         if (!u_wmsg(fp, "help")) {
   1107             /* Now dump callbacks and finish. */
   1108 
   1109             int i, count =
   1110                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
   1111             for (i = 0; i < count; ++i) {
   1112                 fprintf(fp, " %s", transcode_callbacks[i].name);
   1113             }
   1114             fputc('\n', fp);
   1115         }
   1116     }
   1117 
   1118     exit(ecode);
   1119 }
   1120 
   1121 extern int
   1122 main(int argc, char **argv)
   1123 {
   1124     FILE *outfile;
   1125     int ret = 0;
   1126 
   1127     size_t bufsz = DEFAULT_BUFSZ;
   1128 
   1129     const char *fromcpage = 0;
   1130     const char *tocpage = 0;
   1131     const char *translit = 0;
   1132     const char *outfilestr = 0;
   1133     UBool fallback = FALSE;
   1134 
   1135     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
   1136     const void *fromuctxt = 0;
   1137     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
   1138     const void *touctxt = 0;
   1139 
   1140     char **iter, **remainArgv, **remainArgvLimit;
   1141     char **end = argv + argc;
   1142 
   1143     const char *pname;
   1144 
   1145     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
   1146     const char *printName = 0;
   1147 
   1148     UBool verbose = FALSE;
   1149     UErrorCode status = U_ZERO_ERROR;
   1150 
   1151     ConvertFile cf;
   1152 
   1153     /* Initialize ICU */
   1154     u_init(&status);
   1155     if (U_FAILURE(status)) {
   1156         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
   1157             argv[0], u_errorName(status));
   1158         exit(1);
   1159     }
   1160 
   1161     // Get and prettify pname.
   1162     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
   1163 #ifdef U_WINDOWS
   1164     if (!pname) {
   1165         pname = uprv_strrchr(*argv, '/');
   1166     }
   1167 #endif
   1168     if (!pname) {
   1169         pname = *argv;
   1170     } else {
   1171         ++pname;
   1172     }
   1173 
   1174     // First, get the arguments from command-line
   1175     // to know the codepages to convert between
   1176 
   1177     remainArgv = remainArgvLimit = argv + 1;
   1178     for (iter = argv + 1; iter != end; iter++) {
   1179         // Check for from charset
   1180         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
   1181             iter++;
   1182             if (iter != end)
   1183                 fromcpage = *iter;
   1184             else
   1185                 usage(pname, 1);
   1186         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
   1187             iter++;
   1188             if (iter != end)
   1189                 tocpage = *iter;
   1190             else
   1191                 usage(pname, 1);
   1192         } else if (strcmp("-x", *iter) == 0) {
   1193             iter++;
   1194             if (iter != end)
   1195                 translit = *iter;
   1196             else
   1197                 usage(pname, 1);
   1198         } else if (!strcmp("--fallback", *iter)) {
   1199             fallback = TRUE;
   1200         } else if (!strcmp("--no-fallback", *iter)) {
   1201             fallback = FALSE;
   1202         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
   1203             iter++;
   1204             if (iter != end) {
   1205                 bufsz = atoi(*iter);
   1206                 if ((int) bufsz <= 0) {
   1207                     initMsg(pname);
   1208                     UnicodeString str(*iter);
   1209                     initMsg(pname);
   1210                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
   1211                     return 3;
   1212                 }
   1213             } else {
   1214                 usage(pname, 1);
   1215             }
   1216         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
   1217             if (printTranslits) {
   1218                 usage(pname, 1);
   1219             }
   1220             printConvs = TRUE;
   1221         } else if (strcmp("--default-code", *iter) == 0) {
   1222             if (printTranslits) {
   1223                 usage(pname, 1);
   1224             }
   1225             printName = ucnv_getDefaultName();
   1226         } else if (strcmp("--list-code", *iter) == 0) {
   1227             if (printTranslits) {
   1228                 usage(pname, 1);
   1229             }
   1230 
   1231             iter++;
   1232             if (iter != end) {
   1233                 UErrorCode e = U_ZERO_ERROR;
   1234                 printName = ucnv_getAlias(*iter, 0, &e);
   1235                 if (U_FAILURE(e) || !printName) {
   1236                     UnicodeString str(*iter);
   1237                     initMsg(pname);
   1238                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
   1239                     return 2;
   1240                 }
   1241             } else
   1242                 usage(pname, 1);
   1243         } else if (strcmp("--canon", *iter) == 0) {
   1244             printCanon = TRUE;
   1245         } else if (strcmp("-L", *iter) == 0
   1246             || !strcmp("--list-transliterators", *iter)) {
   1247             if (printConvs) {
   1248                 usage(pname, 1);
   1249             }
   1250             printTranslits = TRUE;
   1251         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
   1252             || !strcmp("--help", *iter)) {
   1253             usage(pname, 0);
   1254         } else if (!strcmp("-c", *iter)) {
   1255             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
   1256         } else if (!strcmp("--to-callback", *iter)) {
   1257             iter++;
   1258             if (iter != end) {
   1259                 const struct callback_ent *cbe = findCallback(*iter);
   1260                 if (cbe) {
   1261                     fromucallback = cbe->fromu;
   1262                     fromuctxt = cbe->fromuctxt;
   1263                 } else {
   1264                     UnicodeString str(*iter);
   1265                     initMsg(pname);
   1266                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1267                     return 4;
   1268                 }
   1269             } else {
   1270                 usage(pname, 1);
   1271             }
   1272         } else if (!strcmp("--from-callback", *iter)) {
   1273             iter++;
   1274             if (iter != end) {
   1275                 const struct callback_ent *cbe = findCallback(*iter);
   1276                 if (cbe) {
   1277                     toucallback = cbe->tou;
   1278                     touctxt = cbe->touctxt;
   1279                 } else {
   1280                     UnicodeString str(*iter);
   1281                     initMsg(pname);
   1282                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1283                     return 4;
   1284                 }
   1285             } else {
   1286                 usage(pname, 1);
   1287             }
   1288         } else if (!strcmp("-i", *iter)) {
   1289             toucallback = UCNV_TO_U_CALLBACK_SKIP;
   1290         } else if (!strcmp("--callback", *iter)) {
   1291             iter++;
   1292             if (iter != end) {
   1293                 const struct callback_ent *cbe = findCallback(*iter);
   1294                 if (cbe) {
   1295                     fromucallback = cbe->fromu;
   1296                     fromuctxt = cbe->fromuctxt;
   1297                     toucallback = cbe->tou;
   1298                     touctxt = cbe->touctxt;
   1299                 } else {
   1300                     UnicodeString str(*iter);
   1301                     initMsg(pname);
   1302                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1303                     return 4;
   1304                 }
   1305             } else {
   1306                 usage(pname, 1);
   1307             }
   1308         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
   1309             verbose = FALSE;
   1310         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
   1311             verbose = TRUE;
   1312         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
   1313             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
   1314             return 0;
   1315         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
   1316             ++iter;
   1317             if (iter != end && !outfilestr) {
   1318                 outfilestr = *iter;
   1319             } else {
   1320                 usage(pname, 1);
   1321             }
   1322         } else if (0 == strcmp("--add-signature", *iter)) {
   1323             cf.signature = 1;
   1324         } else if (0 == strcmp("--remove-signature", *iter)) {
   1325             cf.signature = -1;
   1326         } else if (**iter == '-' && (*iter)[1]) {
   1327             usage(pname, 1);
   1328         } else {
   1329             // move a non-option up in argv[]
   1330             *remainArgvLimit++ = *iter;
   1331         }
   1332     }
   1333 
   1334     if (printConvs || printName) {
   1335         return printConverters(pname, printName, printCanon) ? 2 : 0;
   1336     } else if (printTranslits) {
   1337         return printTransliterators(printCanon) ? 3 : 0;
   1338     }
   1339 
   1340     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
   1341         fromcpage = ucnv_getDefaultName();
   1342     }
   1343     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
   1344         tocpage = ucnv_getDefaultName();
   1345     }
   1346 
   1347     // Open the correct output file or connect to stdout for reading input
   1348     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
   1349         outfile = fopen(outfilestr, "wb");
   1350         if (outfile == 0) {
   1351             UnicodeString str1(outfilestr, "");
   1352             UnicodeString str2(strerror(errno), "");
   1353             initMsg(pname);
   1354             u_wmsg(stderr, "cantCreateOutputF",
   1355                 str1.getBuffer(), str2.getBuffer());
   1356             return 1;
   1357         }
   1358     } else {
   1359         outfilestr = "-";
   1360         outfile = stdout;
   1361 #ifdef USE_FILENO_BINARY_MODE
   1362         if (setmode(fileno(outfile), O_BINARY) == -1) {
   1363             u_wmsg(stderr, "cantSetOutBinMode");
   1364             exit(-1);
   1365         }
   1366 #endif
   1367     }
   1368 
   1369     /* Loop again on the arguments to find all the input files, and
   1370     convert them. */
   1371 
   1372     cf.setBufferSize(bufsz);
   1373 
   1374     if(remainArgv < remainArgvLimit) {
   1375         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
   1376             if (!cf.convertFile(
   1377                     pname, fromcpage, toucallback, touctxt, tocpage,
   1378                     fromucallback, fromuctxt, fallback, translit, *iter,
   1379                     outfile, verbose)
   1380             ) {
   1381                 goto error_exit;
   1382             }
   1383         }
   1384     } else {
   1385         if (!cf.convertFile(
   1386                 pname, fromcpage, toucallback, touctxt, tocpage,
   1387                 fromucallback, fromuctxt, fallback, translit, 0,
   1388                 outfile, verbose)
   1389         ) {
   1390             goto error_exit;
   1391         }
   1392     }
   1393 
   1394     goto normal_exit;
   1395 error_exit:
   1396 #if !UCONFIG_NO_LEGACY_CONVERSION
   1397     ret = 1;
   1398 #else
   1399     fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
   1400 #endif
   1401 normal_exit:
   1402 
   1403     if (outfile != stdout) {
   1404         fclose(outfile);
   1405     }
   1406 
   1407     return ret;
   1408 }
   1409 
   1410 
   1411 /*
   1412  * Hey, Emacs, please set the following:
   1413  *
   1414  * Local Variables:
   1415  * indent-tabs-mode: nil
   1416  * End:
   1417  *
   1418  */
   1419