Home | History | Annotate | Download | only in uconv
      1 /*****************************************************************************
      2 *
      3 *   Copyright (C) 1999-2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 *
      6 ******************************************************************************/
      7 
      8 /*
      9  * uconv(1): an iconv(1)-like converter using ICU.
     10  *
     11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom (at) vittran.norrnod.se>
     12  * contributed in 1999.
     13  *
     14  * Conversion to the C conversion API and many improvements by
     15  * Yves Arrouye <yves (at) realnames.com>, current maintainer.
     16  *
     17  * Markus Scherer maintainer from 2003.
     18  * See source code repository history for changes.
     19  */
     20 
     21 #include <unicode/utypes.h>
     22 #include <unicode/putil.h>
     23 #include <unicode/ucnv.h>
     24 #include <unicode/uenum.h>
     25 #include <unicode/unistr.h>
     26 #include <unicode/translit.h>
     27 #include <unicode/uset.h>
     28 #include <unicode/uclean.h>
     29 
     30 #include <stdio.h>
     31 #include <errno.h>
     32 #include <string.h>
     33 #include <stdlib.h>
     34 
     35 #include "cmemory.h"
     36 #include "cstring.h"
     37 #include "ustrfmt.h"
     38 
     39 #include "unicode/uwmsg.h"
     40 
     41 U_NAMESPACE_USE
     42 
     43 #if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
     44 #include <io.h>
     45 #include <fcntl.h>
     46 #if defined(U_WINDOWS)
     47 #define USE_FILENO_BINARY_MODE 1
     48 /* Windows likes to rename Unix-like functions */
     49 #ifndef fileno
     50 #define fileno _fileno
     51 #endif
     52 #ifndef setmode
     53 #define setmode _setmode
     54 #endif
     55 #ifndef O_BINARY
     56 #define O_BINARY _O_BINARY
     57 #endif
     58 #endif
     59 #endif
     60 
     61 #ifdef UCONVMSG_LINK
     62 /* below from the README */
     63 #include "unicode/utypes.h"
     64 #include "unicode/udata.h"
     65 U_CFUNC char uconvmsg_dat[];
     66 #endif
     67 
     68 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     69 
     70 #define DEFAULT_BUFSZ   4096
     71 #define UCONVMSG "uconvmsg"
     72 
     73 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
     74 
     75 /*
     76  * Initialize the message bundle so that message strings can be fetched
     77  * by u_wmsg().
     78  *
     79  */
     80 
     81 static void initMsg(const char *pname) {
     82     static int ps = 0;
     83 
     84     if (!ps) {
     85         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
     86         UErrorCode err = U_ZERO_ERROR;
     87 
     88         ps = 1;
     89 
     90         /* Set up our static data - if any */
     91 #ifdef UCONVMSG_LINK
     92         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
     93         if (U_FAILURE(err)) {
     94           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
     95                   pname, u_errorName(err));
     96           err = U_ZERO_ERROR; /* It may still fail */
     97         }
     98 #endif
     99 
    100         /* Get messages. */
    101         gBundle = u_wmsg_setPath(UCONVMSG, &err);
    102         if (U_FAILURE(err)) {
    103             fprintf(stderr,
    104                     "%s: warning: couldn't open bundle %s: %s\n",
    105                     pname, UCONVMSG, u_errorName(err));
    106 #ifdef UCONVMSG_LINK
    107             fprintf(stderr,
    108                     "%s: setAppData was called, internal data %s failed to load\n",
    109                         pname, UCONVMSG);
    110 #endif
    111 
    112             err = U_ZERO_ERROR;
    113             /* that was try #1, try again with a path */
    114             uprv_strcpy(dataPath, u_getDataDirectory());
    115             uprv_strcat(dataPath, U_FILE_SEP_STRING);
    116             uprv_strcat(dataPath, UCONVMSG);
    117 
    118             gBundle = u_wmsg_setPath(dataPath, &err);
    119             if (U_FAILURE(err)) {
    120                 fprintf(stderr,
    121                     "%s: warning: still couldn't open bundle %s: %s\n",
    122                     pname, dataPath, u_errorName(err));
    123                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
    124             }
    125         }
    126     }
    127 }
    128 
    129 /* Mapping of callback names to the callbacks passed to the converter
    130    API. */
    131 
    132 static struct callback_ent {
    133     const char *name;
    134     UConverterFromUCallback fromu;
    135     const void *fromuctxt;
    136     UConverterToUCallback tou;
    137     const void *touctxt;
    138 } transcode_callbacks[] = {
    139     { "substitute",
    140       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
    141       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
    142     { "skip",
    143       UCNV_FROM_U_CALLBACK_SKIP, 0,
    144       UCNV_TO_U_CALLBACK_SKIP, 0 },
    145     { "stop",
    146       UCNV_FROM_U_CALLBACK_STOP, 0,
    147       UCNV_TO_U_CALLBACK_STOP, 0 },
    148     { "escape",
    149       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
    150       UCNV_TO_U_CALLBACK_ESCAPE, 0},
    151     { "escape-icu",
    152       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
    153       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
    154     { "escape-java",
    155       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
    156       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
    157     { "escape-c",
    158       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
    159       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
    160     { "escape-xml",
    161       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    162       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    163     { "escape-xml-hex",
    164       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
    165       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    166     { "escape-xml-dec",
    167       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
    168       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
    169     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
    170       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
    171 };
    172 
    173 /* Return a pointer to a callback record given its name. */
    174 
    175 static const struct callback_ent *findCallback(const char *name) {
    176     int i, count =
    177         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
    178 
    179     /* We'll do a linear search, there aren't many of them and bsearch()
    180        may not be that portable. */
    181 
    182     for (i = 0; i < count; ++i) {
    183         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
    184             return &transcode_callbacks[i];
    185         }
    186     }
    187 
    188     return 0;
    189 }
    190 
    191 /* Print converter information. If lookfor is set, only that converter will
    192    be printed, otherwise all converters will be printed. If canon is non
    193    zero, tags and aliases for each converter are printed too, in the format
    194    expected for convrters.txt(5). */
    195 
    196 static int printConverters(const char *pname, const char *lookfor,
    197     UBool canon)
    198 {
    199     UErrorCode err = U_ZERO_ERROR;
    200     int32_t num;
    201     uint16_t num_stds;
    202     const char **stds;
    203 
    204     /* If there is a specified name, just handle that now. */
    205 
    206     if (lookfor) {
    207         if (!canon) {
    208             printf("%s\n", lookfor);
    209             return 0;
    210         } else {
    211         /*  Because we are printing a canonical name, we need the
    212             true converter name. We've done that already except for
    213             the default name (because we want to print the exact
    214             name one would get when calling ucnv_getDefaultName()
    215             in non-canon mode). But since we do not know at this
    216             point if we have the default name or something else, we
    217             need to normalize again to the canonical converter
    218             name. */
    219 
    220             const char *truename = ucnv_getAlias(lookfor, 0, &err);
    221             if (U_SUCCESS(err)) {
    222                 lookfor = truename;
    223             } else {
    224                 err = U_ZERO_ERROR;
    225             }
    226         }
    227     }
    228 
    229     /* Print converter names. We come here for one of two reasons: we
    230        are printing all the names (lookfor was null), or we have a
    231        single converter to print but in canon mode, hence we need to
    232        get to it in order to print everything. */
    233 
    234     num = ucnv_countAvailable();
    235     if (num <= 0) {
    236         initMsg(pname);
    237         u_wmsg(stderr, "cantGetNames");
    238         return -1;
    239     }
    240     if (lookfor) {
    241         num = 1;                /* We know where we want to be. */
    242     }
    243 
    244     num_stds = ucnv_countStandards();
    245     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
    246     if (!stds) {
    247         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
    248         return -1;
    249     } else {
    250         uint16_t s;
    251 
    252         if (canon) {
    253             printf("{ ");
    254         }
    255         for (s = 0; s < num_stds; ++s) {
    256             stds[s] = ucnv_getStandard(s, &err);
    257             if (canon) {
    258                 printf("%s ", stds[s]);
    259             }
    260             if (U_FAILURE(err)) {
    261                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
    262                 goto error_cleanup;
    263             }
    264         }
    265         if (canon) {
    266             puts("}");
    267         }
    268     }
    269 
    270     for (int32_t i = 0; i < num; i++) {
    271         const char *name;
    272         uint16_t num_aliases;
    273 
    274         /* Set the name either to what we are looking for, or
    275         to the current converter name. */
    276 
    277         if (lookfor) {
    278             name = lookfor;
    279         } else {
    280             name = ucnv_getAvailableName(i);
    281         }
    282 
    283         /* Get all the aliases associated to the name. */
    284 
    285         err = U_ZERO_ERROR;
    286         num_aliases = ucnv_countAliases(name, &err);
    287         if (U_FAILURE(err)) {
    288             printf("%s", name);
    289 
    290             UnicodeString str(name, "");
    291             putchar('\t');
    292             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    293                 u_wmsg_errorName(err));
    294             goto error_cleanup;
    295         } else {
    296             uint16_t a, s, t;
    297 
    298             /* Write all the aliases and their tags. */
    299 
    300             for (a = 0; a < num_aliases; ++a) {
    301                 const char *alias = ucnv_getAlias(name, a, &err);
    302 
    303                 if (U_FAILURE(err)) {
    304                     UnicodeString str(name, "");
    305                     putchar('\t');
    306                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
    307                         u_wmsg_errorName(err));
    308                     goto error_cleanup;
    309                 }
    310 
    311                 /* Print the current alias so that it looks right. */
    312                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
    313                                  alias,
    314                                  (canon ? "" : " "));
    315 
    316                 /* Look (slowly, linear searching) for a tag. */
    317 
    318                 if (canon) {
    319                     /* -1 to skip the last standard */
    320                     for (s = t = 0; s < num_stds-1; ++s) {
    321                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
    322                         if (U_SUCCESS(err)) {
    323                             /* List the standard tags */
    324                             const char *standardName;
    325                             UBool isFirst = TRUE;
    326                             UErrorCode enumError = U_ZERO_ERROR;
    327                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
    328                                 /* See if this alias is supported by this standard. */
    329                                 if (!strcmp(standardName, alias)) {
    330                                     if (!t) {
    331                                         printf(" {");
    332                                         t = 1;
    333                                     }
    334                                     /* Print a * after the default standard name */
    335                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
    336                                 }
    337                                 isFirst = FALSE;
    338                             }
    339                         }
    340                     }
    341                     if (t) {
    342                         printf(" }");
    343                     }
    344                 }
    345                 /* Terminate this entry. */
    346                 if (canon) {
    347                     puts("");
    348                 }
    349 
    350                 /* Move on. */
    351             }
    352             /* Terminate this entry. */
    353             if (!canon) {
    354                 puts("");
    355             }
    356         }
    357     }
    358 
    359     /* Free temporary data. */
    360 
    361     uprv_free(stds);
    362 
    363     /* Success. */
    364 
    365     return 0;
    366 error_cleanup:
    367     uprv_free(stds);
    368     return -1;
    369 }
    370 
    371 /* Print all available transliterators. If canon is non zero, print
    372    one transliterator per line. */
    373 
    374 static int printTransliterators(UBool canon)
    375 {
    376 #if UCONFIG_NO_TRANSLITERATION
    377     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
    378     return 1;
    379 #else
    380     UErrorCode status = U_ZERO_ERROR;
    381     UEnumeration *ids = utrans_openIDs(&status);
    382     int32_t i, numtrans = uenum_count(ids, &status);
    383 
    384     char sepchar = canon ? '\n' : ' ';
    385 
    386     for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
    387     	int32_t len;
    388     	const char *nextTrans = uenum_next(ids, &len, &status);
    389 
    390         printf("%s", nextTrans);
    391         if (i < numtrans - 1) {
    392             putchar(sepchar);
    393         }
    394     }
    395 
    396     uenum_close(ids);
    397 
    398     /* Add a terminating newline if needed. */
    399 
    400     if (sepchar != '\n') {
    401         putchar('\n');
    402     }
    403 
    404     /* Success. */
    405 
    406     return 0;
    407 #endif
    408 }
    409 
    410 enum {
    411     uSP = 0x20,         // space
    412     uCR = 0xd,          // carriage return
    413     uLF = 0xa,          // line feed
    414     uNL = 0x85,         // newline
    415     uLS = 0x2028,       // line separator
    416     uPS = 0x2029,       // paragraph separator
    417     uSig = 0xfeff       // signature/BOM character
    418 };
    419 
    420 static inline int32_t
    421 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
    422     // find one of
    423     // CR, LF, CRLF, NL, LS, PS
    424     // for paragraph ends (see UAX #13/Unicode 4)
    425     // and include it in the chunk
    426     // all of these characters are on the BMP
    427     // do not include FF or VT in case they are part of a paragraph
    428     // (important for bidi contexts)
    429     static const UChar paraEnds[] = {
    430         0xd, 0xa, 0x85, 0x2028, 0x2029
    431     };
    432     enum {
    433         iCR, iLF, iNL, iLS, iPS, iCount
    434     };
    435 
    436     // first, see if there is a CRLF split between prev and s
    437     if (prev.endsWith(paraEnds + iCR, 1)) {
    438         if (s.startsWith(paraEnds + iLF, 1)) {
    439             return 1; // split CRLF, include the LF
    440         } else if (!s.isEmpty()) {
    441             return 0; // complete the last chunk
    442         } else {
    443             return -1; // wait for actual further contents to arrive
    444         }
    445     }
    446 
    447     const UChar *u = s.getBuffer(), *limit = u + s.length();
    448     UChar c;
    449 
    450     while (u < limit) {
    451         c = *u++;
    452         if (
    453             ((c < uSP) && (c == uCR || c == uLF)) ||
    454             (c == uNL) ||
    455             ((c & uLS) == uLS)
    456         ) {
    457             if (c == uCR) {
    458                 // check for CRLF
    459                 if (u == limit) {
    460                     return -1; // LF may be in the next chunk
    461                 } else if (*u == uLF) {
    462                     ++u; // include the LF in this chunk
    463                 }
    464             }
    465             return (int32_t)(u - s.getBuffer());
    466         }
    467     }
    468 
    469     return -1; // continue collecting the chunk
    470 }
    471 
    472 enum {
    473     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
    474     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
    475     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
    476 };
    477 
    478 static inline UChar
    479 nibbleToHex(uint8_t n) {
    480     n &= 0xf;
    481     return
    482         n <= 9 ?
    483             (UChar)(0x30 + n) :
    484             (UChar)((0x61 - 10) + n);
    485 }
    486 
    487 // check the converter's Unicode signature properties;
    488 // the fromUnicode side of the converter must be in its initial state
    489 // and will be reset again if it was used
    490 static int32_t
    491 cnvSigType(UConverter *cnv) {
    492     UErrorCode err;
    493     int32_t result;
    494 
    495     // test if the output charset can convert U+FEFF
    496     USet *set = uset_open(1, 0);
    497     err = U_ZERO_ERROR;
    498     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
    499     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
    500         result = CNV_WITH_FEFF;
    501     } else {
    502         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
    503     }
    504     uset_close(set);
    505 
    506     if (result == CNV_WITH_FEFF) {
    507         // test if the output charset emits a signature anyway
    508         const UChar a[1] = { 0x61 }; // "a"
    509         const UChar *in;
    510 
    511         char buffer[20];
    512         char *out;
    513 
    514         in = a;
    515         out = buffer;
    516         err = U_ZERO_ERROR;
    517         ucnv_fromUnicode(cnv,
    518             &out, buffer + sizeof(buffer),
    519             &in, a + 1,
    520             NULL, TRUE, &err);
    521         ucnv_resetFromUnicode(cnv);
    522 
    523         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
    524             U_SUCCESS(err)
    525         ) {
    526             result = CNV_ADDS_FEFF;
    527         }
    528     }
    529 
    530     return result;
    531 }
    532 
    533 class ConvertFile {
    534 public:
    535     ConvertFile() :
    536         buf(NULL), outbuf(NULL), fromoffsets(NULL),
    537         bufsz(0), signature(0) {}
    538 
    539     void
    540     setBufferSize(size_t bufferSize) {
    541         bufsz = bufferSize;
    542 
    543         buf = new char[2 * bufsz];
    544         outbuf = buf + bufsz;
    545 
    546         // +1 for an added U+FEFF in the intermediate Unicode buffer
    547         fromoffsets = new int32_t[bufsz + 1];
    548     }
    549 
    550     ~ConvertFile() {
    551         delete [] buf;
    552         delete [] fromoffsets;
    553     }
    554 
    555     UBool convertFile(const char *pname,
    556                       const char *fromcpage,
    557                       UConverterToUCallback toucallback,
    558                       const void *touctxt,
    559                       const char *tocpage,
    560                       UConverterFromUCallback fromucallback,
    561                       const void *fromuctxt,
    562                       UBool fallback,
    563                       const char *translit,
    564                       const char *infilestr,
    565                       FILE * outfile, int verbose);
    566 private:
    567     friend int main(int argc, char **argv);
    568 
    569     char *buf, *outbuf;
    570     int32_t *fromoffsets;
    571 
    572     size_t bufsz;
    573     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
    574 };
    575 
    576 // Convert a file from one encoding to another
    577 UBool
    578 ConvertFile::convertFile(const char *pname,
    579                          const char *fromcpage,
    580                          UConverterToUCallback toucallback,
    581                          const void *touctxt,
    582                          const char *tocpage,
    583                          UConverterFromUCallback fromucallback,
    584                          const void *fromuctxt,
    585                          UBool fallback,
    586                          const char *translit,
    587                          const char *infilestr,
    588                          FILE * outfile, int verbose)
    589 {
    590     FILE *infile;
    591     UBool ret = TRUE;
    592     UConverter *convfrom = 0;
    593     UConverter *convto = 0;
    594     UErrorCode err = U_ZERO_ERROR;
    595     UBool flush;
    596     const char *cbufp, *prevbufp;
    597     char *bufp;
    598 
    599     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
    600 
    601     const UChar *unibuf, *unibufbp;
    602     UChar *unibufp;
    603 
    604     size_t rd, wr;
    605 
    606 #if !UCONFIG_NO_TRANSLITERATION
    607     Transliterator *t = 0;      // Transliterator acting on Unicode data.
    608     UnicodeString chunk;        // One chunk of the text being collected for transformation.
    609 #endif
    610     UnicodeString u;            // String to do the transliteration.
    611     int32_t ulen;
    612 
    613     // use conversion offsets for error messages
    614     // unless a transliterator is used -
    615     // a text transformation will reorder characters in unpredictable ways
    616     UBool useOffsets = TRUE;
    617 
    618     // Open the correct input file or connect to stdin for reading input
    619 
    620     if (infilestr != 0 && strcmp(infilestr, "-")) {
    621         infile = fopen(infilestr, "rb");
    622         if (infile == 0) {
    623             UnicodeString str1(infilestr, "");
    624             str1.append((UChar32) 0);
    625             UnicodeString str2(strerror(errno), "");
    626             str2.append((UChar32) 0);
    627             initMsg(pname);
    628             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
    629             return FALSE;
    630         }
    631     } else {
    632         infilestr = "-";
    633         infile = stdin;
    634 #ifdef USE_FILENO_BINARY_MODE
    635         if (setmode(fileno(stdin), O_BINARY) == -1) {
    636             initMsg(pname);
    637             u_wmsg(stderr, "cantSetInBinMode");
    638             return FALSE;
    639         }
    640 #endif
    641     }
    642 
    643     if (verbose) {
    644         fprintf(stderr, "%s:\n", infilestr);
    645     }
    646 
    647 #if !UCONFIG_NO_TRANSLITERATION
    648     // Create transliterator as needed.
    649 
    650     if (translit != NULL && *translit) {
    651         UParseError parse;
    652         UnicodeString str(translit), pestr;
    653 
    654         /* Create from rules or by ID as needed. */
    655 
    656         parse.line = -1;
    657 
    658         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
    659             t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
    660         } else {
    661             t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
    662         }
    663 
    664         if (U_FAILURE(err)) {
    665             str.append((UChar32) 0);
    666             initMsg(pname);
    667 
    668             if (parse.line >= 0) {
    669                 UChar linebuf[20], offsetbuf[20];
    670                 uprv_itou(linebuf, 20, parse.line, 10, 0);
    671                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
    672                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
    673                     u_wmsg_errorName(err), linebuf, offsetbuf);
    674             } else {
    675                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
    676                     u_wmsg_errorName(err));
    677             }
    678 
    679             if (t) {
    680                 delete t;
    681                 t = 0;
    682             }
    683             goto error_exit;
    684         }
    685 
    686         useOffsets = FALSE;
    687     }
    688 #endif
    689 
    690     // Create codepage converter. If the codepage or its aliases weren't
    691     // available, it returns NULL and a failure code. We also set the
    692     // callbacks, and return errors in the same way.
    693 
    694     convfrom = ucnv_open(fromcpage, &err);
    695     if (U_FAILURE(err)) {
    696         UnicodeString str(fromcpage, "");
    697         initMsg(pname);
    698         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
    699             u_wmsg_errorName(err));
    700         goto error_exit;
    701     }
    702     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
    703     if (U_FAILURE(err)) {
    704         initMsg(pname);
    705         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    706         goto error_exit;
    707     }
    708 
    709     convto = ucnv_open(tocpage, &err);
    710     if (U_FAILURE(err)) {
    711         UnicodeString str(tocpage, "");
    712         initMsg(pname);
    713         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
    714             u_wmsg_errorName(err));
    715         goto error_exit;
    716     }
    717     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
    718     if (U_FAILURE(err)) {
    719         initMsg(pname);
    720         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
    721         goto error_exit;
    722     }
    723     ucnv_setFallback(convto, fallback);
    724 
    725     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
    726     int8_t sig;
    727 
    728     // OK, we can convert now.
    729     sig = signature;
    730     rd = 0;
    731 
    732     do {
    733         willexit = FALSE;
    734 
    735         // input file offset at the beginning of the next buffer
    736         infoffset += rd;
    737 
    738         rd = fread(buf, 1, bufsz, infile);
    739         if (ferror(infile) != 0) {
    740             UnicodeString str(strerror(errno));
    741             initMsg(pname);
    742             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
    743             goto error_exit;
    744         }
    745 
    746         // Convert the read buffer into the new encoding via Unicode.
    747         // After the call 'unibufp' will be placed behind the last
    748         // character that was converted in the 'unibuf'.
    749         // Also the 'cbufp' is positioned behind the last converted
    750         // character.
    751         // At the last conversion in the file, flush should be set to
    752         // true so that we get all characters converted.
    753         //
    754         // The converter must be flushed at the end of conversion so
    755         // that characters on hold also will be written.
    756 
    757         cbufp = buf;
    758         flush = (UBool)(rd != bufsz);
    759 
    760         // convert until the input is consumed
    761         do {
    762             // remember the start of the current byte-to-Unicode conversion
    763             prevbufp = cbufp;
    764 
    765             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
    766 
    767             // Use bufsz instead of u.getCapacity() for the targetLimit
    768             // so that we don't overflow fromoffsets[].
    769             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
    770                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
    771 
    772             ulen = (int32_t)(unibufp - unibuf);
    773             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
    774 
    775             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
    776             // converting all of the input bytes.
    777             // It works like this because ucnv_toUnicode() returns only under the
    778             // following conditions:
    779             // - an error occurred during conversion (an error code is set)
    780             // - the target buffer is filled (the error code indicates an overflow)
    781             // - the source is consumed
    782             // That is, if the error code does not indicate a failure,
    783             // not even an overflow, then the source must be consumed entirely.
    784             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
    785 
    786             if (err == U_BUFFER_OVERFLOW_ERROR) {
    787                 err = U_ZERO_ERROR;
    788             } else if (U_FAILURE(err)) {
    789                 char pos[32], errorBytes[32];
    790                 int8_t i, length, errorLength;
    791 
    792                 UErrorCode localError = U_ZERO_ERROR;
    793                 errorLength = (int8_t)sizeof(errorBytes);
    794                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
    795                 if (U_FAILURE(localError) || errorLength == 0) {
    796                     errorLength = 1;
    797                 }
    798 
    799                 // print the input file offset of the start of the error bytes:
    800                 // input file offset of the current byte buffer +
    801                 // length of the just consumed bytes -
    802                 // length of the error bytes
    803                 length =
    804                     (int8_t)sprintf(pos, "%d",
    805                         (int)(infoffset + (cbufp - buf) - errorLength));
    806 
    807                 // output the bytes that caused the error
    808                 UnicodeString str;
    809                 for (i = 0; i < errorLength; ++i) {
    810                     if (i > 0) {
    811                         str.append((UChar)uSP);
    812                     }
    813                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
    814                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
    815                 }
    816 
    817                 initMsg(pname);
    818                 u_wmsg(stderr, "problemCvtToU",
    819                         UnicodeString(pos, length, "").getTerminatedBuffer(),
    820                         str.getTerminatedBuffer(),
    821                         u_wmsg_errorName(err));
    822 
    823                 willexit = TRUE;
    824                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
    825             }
    826 
    827             // Replaced a check for whether the input was consumed by
    828             // looping until it is; message key "premEndInput" now obsolete.
    829 
    830             if (ulen == 0) {
    831                 continue;
    832             }
    833 
    834             // remove a U+FEFF Unicode signature character if requested
    835             if (sig < 0) {
    836                 if (u.charAt(0) == uSig) {
    837                     u.remove(0, 1);
    838 
    839                     // account for the removed UChar and offset
    840                     --ulen;
    841 
    842                     if (useOffsets) {
    843                         // remove an offset from fromoffsets[] as well
    844                         // to keep the array parallel with the UChars
    845                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
    846                     }
    847 
    848                 }
    849                 sig = 0;
    850             }
    851 
    852 #if !UCONFIG_NO_TRANSLITERATION
    853             // Transliterate/transform if needed.
    854 
    855             // For transformation, we use chunking code -
    856             // collect Unicode input until, for example, an end-of-line,
    857             // then transform and output-convert that and continue collecting.
    858             // This makes the transformation result independent of the buffer size
    859             // while avoiding the slower keyboard mode.
    860             // The end-of-chunk characters are completely included in the
    861             // transformed string in case they are to be transformed themselves.
    862             if (t != NULL) {
    863                 UnicodeString out;
    864                 int32_t chunkLimit;
    865 
    866                 do {
    867                     chunkLimit = getChunkLimit(chunk, u);
    868                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
    869                         // use all of the rest at the end of the text
    870                         chunkLimit = u.length();
    871                     }
    872                     if (chunkLimit >= 0) {
    873                         // complete the chunk and transform it
    874                         chunk.append(u, 0, chunkLimit);
    875                         u.remove(0, chunkLimit);
    876                         t->transliterate(chunk);
    877 
    878                         // append the transformation result to the result and empty the chunk
    879                         out.append(chunk);
    880                         chunk.remove();
    881                     } else {
    882                         // continue collecting the chunk
    883                         chunk.append(u);
    884                         break;
    885                     }
    886                 } while (!u.isEmpty());
    887 
    888                 u = out;
    889                 ulen = u.length();
    890             }
    891 #endif
    892 
    893             // add a U+FEFF Unicode signature character if requested
    894             // and possible/necessary
    895             if (sig > 0) {
    896                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
    897                     u.insert(0, (UChar)uSig);
    898 
    899                     if (useOffsets) {
    900                         // insert a pseudo-offset into fromoffsets[] as well
    901                         // to keep the array parallel with the UChars
    902                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
    903                         fromoffsets[0] = -1;
    904                     }
    905 
    906                     // account for the additional UChar and offset
    907                     ++ulen;
    908                 }
    909                 sig = 0;
    910             }
    911 
    912             // Convert the Unicode buffer into the destination codepage
    913             // Again 'bufp' will be placed behind the last converted character
    914             // And 'unibufp' will be placed behind the last converted unicode character
    915             // At the last conversion flush should be set to true to ensure that
    916             // all characters left get converted
    917 
    918             unibuf = unibufbp = u.getBuffer();
    919 
    920             do {
    921                 bufp = outbuf;
    922 
    923                 // Use fromSawEndOfBytes in addition to the flush flag -
    924                 // it indicates whether the intermediate Unicode string
    925                 // contains the very last UChars for the very last input bytes.
    926                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
    927                                  &unibufbp,
    928                                  unibuf + ulen,
    929                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
    930 
    931                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
    932                 // converting all of the intermediate UChars.
    933                 // See comment for fromSawEndOfBytes.
    934                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
    935 
    936                 if (err == U_BUFFER_OVERFLOW_ERROR) {
    937                     err = U_ZERO_ERROR;
    938                 } else if (U_FAILURE(err)) {
    939                     UChar errorUChars[4];
    940                     const char *errtag;
    941                     char pos[32];
    942                     UChar32 c;
    943                     int8_t i, length, errorLength;
    944 
    945                     UErrorCode localError = U_ZERO_ERROR;
    946                     errorLength = (int8_t)LENGTHOF(errorUChars);
    947                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
    948                     if (U_FAILURE(localError) || errorLength == 0) {
    949                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
    950                         errorLength = 1;
    951                     }
    952 
    953                     int32_t ferroffset;
    954 
    955                     if (useOffsets) {
    956                         // Unicode buffer offset of the start of the error UChars
    957                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
    958                         if (ferroffset < 0) {
    959                             // approximation - the character started in the previous Unicode buffer
    960                             ferroffset = 0;
    961                         }
    962 
    963                         // get the corresponding byte offset out of fromoffsets[]
    964                         // go back if the offset is not known for some of the UChars
    965                         int32_t fromoffset;
    966                         do {
    967                             fromoffset = fromoffsets[ferroffset];
    968                         } while (fromoffset < 0 && --ferroffset >= 0);
    969 
    970                         // total input file offset =
    971                         // input file offset of the current byte buffer +
    972                         // byte buffer offset of where the current Unicode buffer is converted from +
    973                         // fromoffsets[Unicode offset]
    974                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
    975                         errtag = "problemCvtFromU";
    976                     } else {
    977                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
    978                         // be different from what the offsets refer to.
    979 
    980                         // output file offset
    981                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
    982                         errtag = "problemCvtFromUOut";
    983                     }
    984 
    985                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
    986 
    987                     // output the code points that caused the error
    988                     UnicodeString str;
    989                     for (i = 0; i < errorLength;) {
    990                         if (i > 0) {
    991                             str.append((UChar)uSP);
    992                         }
    993                         U16_NEXT(errorUChars, i, errorLength, c);
    994                         if (c >= 0x100000) {
    995                             str.append(nibbleToHex((uint8_t)(c >> 20)));
    996                         }
    997                         if (c >= 0x10000) {
    998                             str.append(nibbleToHex((uint8_t)(c >> 16)));
    999                         }
   1000                         str.append(nibbleToHex((uint8_t)(c >> 12)));
   1001                         str.append(nibbleToHex((uint8_t)(c >> 8)));
   1002                         str.append(nibbleToHex((uint8_t)(c >> 4)));
   1003                         str.append(nibbleToHex((uint8_t)c));
   1004                     }
   1005 
   1006                     initMsg(pname);
   1007                     u_wmsg(stderr, errtag,
   1008                             UnicodeString(pos, length, "").getTerminatedBuffer(),
   1009                             str.getTerminatedBuffer(),
   1010                            u_wmsg_errorName(err));
   1011                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
   1012 
   1013                     willexit = TRUE;
   1014                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
   1015                 }
   1016 
   1017                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
   1018                 // looping until they are; message key "premEnd" now obsolete.
   1019 
   1020                 // Finally, write the converted buffer to the output file
   1021                 size_t outlen = (size_t) (bufp - outbuf);
   1022                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
   1023                 if (wr != outlen) {
   1024                     UnicodeString str(strerror(errno));
   1025                     initMsg(pname);
   1026                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
   1027                     willexit = TRUE;
   1028                 }
   1029 
   1030                 if (willexit) {
   1031                     goto error_exit;
   1032                 }
   1033             } while (!toSawEndOfUnicode);
   1034         } while (!fromSawEndOfBytes);
   1035     } while (!flush);           // Stop when we have flushed the
   1036                                 // converters (this means that it's
   1037                                 // the end of output)
   1038 
   1039     goto normal_exit;
   1040 
   1041 error_exit:
   1042     ret = FALSE;
   1043 
   1044 normal_exit:
   1045     // Cleanup.
   1046 
   1047     ucnv_close(convfrom);
   1048     ucnv_close(convto);
   1049 
   1050 #if !UCONFIG_NO_TRANSLITERATION
   1051     delete t;
   1052 #endif
   1053 
   1054     if (infile != stdin) {
   1055         fclose(infile);
   1056     }
   1057 
   1058     return ret;
   1059 }
   1060 
   1061 static void usage(const char *pname, int ecode) {
   1062     const UChar *msg;
   1063     int32_t msgLen;
   1064     UErrorCode err = U_ZERO_ERROR;
   1065     FILE *fp = ecode ? stderr : stdout;
   1066     int res;
   1067 
   1068     initMsg(pname);
   1069     msg =
   1070         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
   1071                             &msgLen, &err);
   1072     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
   1073     UnicodeString mname(msg, msgLen + 1);
   1074 
   1075     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
   1076     if (!ecode) {
   1077         if (!res) {
   1078             fputc('\n', fp);
   1079         }
   1080         if (!u_wmsg(fp, "help")) {
   1081             /* Now dump callbacks and finish. */
   1082 
   1083             int i, count =
   1084                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
   1085             for (i = 0; i < count; ++i) {
   1086                 fprintf(fp, " %s", transcode_callbacks[i].name);
   1087             }
   1088             fputc('\n', fp);
   1089         }
   1090     }
   1091 
   1092     exit(ecode);
   1093 }
   1094 
   1095 extern int
   1096 main(int argc, char **argv)
   1097 {
   1098     FILE *outfile;
   1099     int ret = 0;
   1100 
   1101     size_t bufsz = DEFAULT_BUFSZ;
   1102 
   1103     const char *fromcpage = 0;
   1104     const char *tocpage = 0;
   1105     const char *translit = 0;
   1106     const char *outfilestr = 0;
   1107     UBool fallback = FALSE;
   1108 
   1109     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
   1110     const void *fromuctxt = 0;
   1111     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
   1112     const void *touctxt = 0;
   1113 
   1114     char **iter, **remainArgv, **remainArgvLimit;
   1115     char **end = argv + argc;
   1116 
   1117     const char *pname;
   1118 
   1119     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
   1120     const char *printName = 0;
   1121 
   1122     UBool verbose = FALSE;
   1123     UErrorCode status = U_ZERO_ERROR;
   1124 
   1125     ConvertFile cf;
   1126 
   1127     /* Initialize ICU */
   1128     u_init(&status);
   1129     if (U_FAILURE(status)) {
   1130         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
   1131             argv[0], u_errorName(status));
   1132         exit(1);
   1133     }
   1134 
   1135     // Get and prettify pname.
   1136     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
   1137 #ifdef U_WINDOWS
   1138     if (!pname) {
   1139         pname = uprv_strrchr(*argv, '/');
   1140     }
   1141 #endif
   1142     if (!pname) {
   1143         pname = *argv;
   1144     } else {
   1145         ++pname;
   1146     }
   1147 
   1148     // First, get the arguments from command-line
   1149     // to know the codepages to convert between
   1150 
   1151     remainArgv = remainArgvLimit = argv + 1;
   1152     for (iter = argv + 1; iter != end; iter++) {
   1153         // Check for from charset
   1154         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
   1155             iter++;
   1156             if (iter != end)
   1157                 fromcpage = *iter;
   1158             else
   1159                 usage(pname, 1);
   1160         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
   1161             iter++;
   1162             if (iter != end)
   1163                 tocpage = *iter;
   1164             else
   1165                 usage(pname, 1);
   1166         } else if (strcmp("-x", *iter) == 0) {
   1167             iter++;
   1168             if (iter != end)
   1169                 translit = *iter;
   1170             else
   1171                 usage(pname, 1);
   1172         } else if (!strcmp("--fallback", *iter)) {
   1173             fallback = TRUE;
   1174         } else if (!strcmp("--no-fallback", *iter)) {
   1175             fallback = FALSE;
   1176         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
   1177             iter++;
   1178             if (iter != end) {
   1179                 bufsz = atoi(*iter);
   1180                 if ((int) bufsz <= 0) {
   1181                     initMsg(pname);
   1182                     UnicodeString str(*iter);
   1183                     initMsg(pname);
   1184                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
   1185                     return 3;
   1186                 }
   1187             } else {
   1188                 usage(pname, 1);
   1189             }
   1190         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
   1191             if (printTranslits) {
   1192                 usage(pname, 1);
   1193             }
   1194             printConvs = TRUE;
   1195         } else if (strcmp("--default-code", *iter) == 0) {
   1196             if (printTranslits) {
   1197                 usage(pname, 1);
   1198             }
   1199             printName = ucnv_getDefaultName();
   1200         } else if (strcmp("--list-code", *iter) == 0) {
   1201             if (printTranslits) {
   1202                 usage(pname, 1);
   1203             }
   1204 
   1205             iter++;
   1206             if (iter != end) {
   1207                 UErrorCode e = U_ZERO_ERROR;
   1208                 printName = ucnv_getAlias(*iter, 0, &e);
   1209                 if (U_FAILURE(e) || !printName) {
   1210                     UnicodeString str(*iter);
   1211                     initMsg(pname);
   1212                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
   1213                     return 2;
   1214                 }
   1215             } else
   1216                 usage(pname, 1);
   1217         } else if (strcmp("--canon", *iter) == 0) {
   1218             printCanon = TRUE;
   1219         } else if (strcmp("-L", *iter) == 0
   1220             || !strcmp("--list-transliterators", *iter)) {
   1221             if (printConvs) {
   1222                 usage(pname, 1);
   1223             }
   1224             printTranslits = TRUE;
   1225         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
   1226             || !strcmp("--help", *iter)) {
   1227             usage(pname, 0);
   1228         } else if (!strcmp("-c", *iter)) {
   1229             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
   1230         } else if (!strcmp("--to-callback", *iter)) {
   1231             iter++;
   1232             if (iter != end) {
   1233                 const struct callback_ent *cbe = findCallback(*iter);
   1234                 if (cbe) {
   1235                     fromucallback = cbe->fromu;
   1236                     fromuctxt = cbe->fromuctxt;
   1237                 } else {
   1238                     UnicodeString str(*iter);
   1239                     initMsg(pname);
   1240                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1241                     return 4;
   1242                 }
   1243             } else {
   1244                 usage(pname, 1);
   1245             }
   1246         } else if (!strcmp("--from-callback", *iter)) {
   1247             iter++;
   1248             if (iter != end) {
   1249                 const struct callback_ent *cbe = findCallback(*iter);
   1250                 if (cbe) {
   1251                     toucallback = cbe->tou;
   1252                     touctxt = cbe->touctxt;
   1253                 } else {
   1254                     UnicodeString str(*iter);
   1255                     initMsg(pname);
   1256                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1257                     return 4;
   1258                 }
   1259             } else {
   1260                 usage(pname, 1);
   1261             }
   1262         } else if (!strcmp("-i", *iter)) {
   1263             toucallback = UCNV_TO_U_CALLBACK_SKIP;
   1264         } else if (!strcmp("--callback", *iter)) {
   1265             iter++;
   1266             if (iter != end) {
   1267                 const struct callback_ent *cbe = findCallback(*iter);
   1268                 if (cbe) {
   1269                     fromucallback = cbe->fromu;
   1270                     fromuctxt = cbe->fromuctxt;
   1271                     toucallback = cbe->tou;
   1272                     touctxt = cbe->touctxt;
   1273                 } else {
   1274                     UnicodeString str(*iter);
   1275                     initMsg(pname);
   1276                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
   1277                     return 4;
   1278                 }
   1279             } else {
   1280                 usage(pname, 1);
   1281             }
   1282         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
   1283             verbose = FALSE;
   1284         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
   1285             verbose = TRUE;
   1286         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
   1287             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
   1288             return 0;
   1289         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
   1290             ++iter;
   1291             if (iter != end && !outfilestr) {
   1292                 outfilestr = *iter;
   1293             } else {
   1294                 usage(pname, 1);
   1295             }
   1296         } else if (0 == strcmp("--add-signature", *iter)) {
   1297             cf.signature = 1;
   1298         } else if (0 == strcmp("--remove-signature", *iter)) {
   1299             cf.signature = -1;
   1300         } else if (**iter == '-' && (*iter)[1]) {
   1301             usage(pname, 1);
   1302         } else {
   1303             // move a non-option up in argv[]
   1304             *remainArgvLimit++ = *iter;
   1305         }
   1306     }
   1307 
   1308     if (printConvs || printName) {
   1309         return printConverters(pname, printName, printCanon) ? 2 : 0;
   1310     } else if (printTranslits) {
   1311         return printTransliterators(printCanon) ? 3 : 0;
   1312     }
   1313 
   1314     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
   1315         fromcpage = ucnv_getDefaultName();
   1316     }
   1317     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
   1318         tocpage = ucnv_getDefaultName();
   1319     }
   1320 
   1321     // Open the correct output file or connect to stdout for reading input
   1322     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
   1323         outfile = fopen(outfilestr, "wb");
   1324         if (outfile == 0) {
   1325             UnicodeString str1(outfilestr, "");
   1326             UnicodeString str2(strerror(errno), "");
   1327             initMsg(pname);
   1328             u_wmsg(stderr, "cantCreateOutputF",
   1329                 str1.getBuffer(), str2.getBuffer());
   1330             return 1;
   1331         }
   1332     } else {
   1333         outfilestr = "-";
   1334         outfile = stdout;
   1335 #ifdef USE_FILENO_BINARY_MODE
   1336         if (setmode(fileno(outfile), O_BINARY) == -1) {
   1337             u_wmsg(stderr, "cantSetOutBinMode");
   1338             exit(-1);
   1339         }
   1340 #endif
   1341     }
   1342 
   1343     /* Loop again on the arguments to find all the input files, and
   1344     convert them. */
   1345 
   1346     cf.setBufferSize(bufsz);
   1347 
   1348     if(remainArgv < remainArgvLimit) {
   1349         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
   1350             if (!cf.convertFile(
   1351                     pname, fromcpage, toucallback, touctxt, tocpage,
   1352                     fromucallback, fromuctxt, fallback, translit, *iter,
   1353                     outfile, verbose)
   1354             ) {
   1355                 goto error_exit;
   1356             }
   1357         }
   1358     } else {
   1359         if (!cf.convertFile(
   1360                 pname, fromcpage, toucallback, touctxt, tocpage,
   1361                 fromucallback, fromuctxt, fallback, translit, 0,
   1362                 outfile, verbose)
   1363         ) {
   1364             goto error_exit;
   1365         }
   1366     }
   1367 
   1368     goto normal_exit;
   1369 error_exit:
   1370 #if !UCONFIG_NO_LEGACY_CONVERSION
   1371     ret = 1;
   1372 #else
   1373     fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
   1374 #endif
   1375 normal_exit:
   1376 
   1377     if (outfile != stdout) {
   1378         fclose(outfile);
   1379     }
   1380 
   1381     return ret;
   1382 }
   1383 
   1384 
   1385 /*
   1386  * Hey, Emacs, please set the following:
   1387  *
   1388  * Local Variables:
   1389  * indent-tabs-mode: nil
   1390  * End:
   1391  *
   1392  */
   1393