Home | History | Annotate | Download | only in genrb
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2002-2009, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *
      9 * File wrtxml.cpp
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   10/01/02    Ram         Creation.
     15 *   02/07/08    Spieth      Correct XLIFF generation on EBCDIC platform
     16 *
     17 *******************************************************************************
     18 */
     19 #include "reslist.h"
     20 #include "unewdata.h"
     21 #include "unicode/ures.h"
     22 #include "errmsg.h"
     23 #include "filestrm.h"
     24 #include "cstring.h"
     25 #include "unicode/ucnv.h"
     26 #include "genrb.h"
     27 #include "rle.h"
     28 #include "ucol_tok.h"
     29 #include "uhash.h"
     30 #include "uresimp.h"
     31 #include "unicode/ustring.h"
     32 #include "unicode/uchar.h"
     33 #include "ustr.h"
     34 #include "prscmnts.h"
     35 #include "unicode/unistr.h"
     36 #include <time.h>
     37 
     38 U_NAMESPACE_USE
     39 
     40 static int tabCount = 0;
     41 
     42 static FileStream* out=NULL;
     43 static struct SRBRoot* srBundle ;
     44 static const char* outDir = NULL;
     45 static const char* enc ="";
     46 static UConverter* conv = NULL;
     47 
     48 const char* const* ISOLanguages;
     49 const char* const* ISOCountries;
     50 const char* textExt = ".txt";
     51 const char* xliffExt = ".xlf";
     52 
     53 static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
     54 {
     55     UErrorCode status = U_ZERO_ERROR;
     56     int32_t len = 0;
     57 
     58     // preflight to get the destination buffer size
     59     u_strToUTF8(NULL,
     60                 0,
     61                 &len,
     62                 outString.getBuffer(),
     63                 outString.length(),
     64                 &status);
     65 
     66     // allocate the buffer
     67     char* dest = (char*)uprv_malloc(len);
     68     status = U_ZERO_ERROR;
     69 
     70     // convert the data
     71     u_strToUTF8(dest,
     72                 len,
     73                 &len,
     74                 outString.getBuffer(),
     75                 outString.length(),
     76                 &status);
     77 
     78     // write data to out file
     79     int32_t ret = T_FileStream_write(fileStream, dest, len);
     80     uprv_free(dest);
     81     return (ret);
     82 }
     83 
     84 /*write indentation for formatting*/
     85 static void write_tabs(FileStream* os){
     86     int i=0;
     87     for(;i<=tabCount;i++){
     88         write_utf8_file(os,UnicodeString("    "));
     89     }
     90 }
     91 
     92 /*get ID for each element. ID is globally unique.*/
     93 static char* getID(const char* id, const char* curKey, char* result) {
     94     if(curKey == NULL) {
     95         result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
     96         uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
     97         uprv_strcpy(result, id);
     98     } else {
     99         result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
    100         uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
    101         if(id[0]!='\0'){
    102             uprv_strcpy(result, id);
    103             uprv_strcat(result, "_");
    104         }
    105         uprv_strcat(result, curKey);
    106     }
    107     return result;
    108 }
    109 
    110 /*compute CRC for binary code*/
    111 /* The code is from  http://www.theorem.com/java/CRC32.java
    112  * Calculates the CRC32 - 32 bit Cyclical Redundancy Check
    113  * <P> This check is used in numerous systems to verify the integrity
    114  * of information.  It's also used as a hashing function.  Unlike a regular
    115  * checksum, it's sensitive to the order of the characters.
    116  * It produces a 32 bit
    117  *
    118  * @author Michael Lecuyer (mjl (at) theorem.com)
    119  * @version 1.1 August 11, 1998
    120  */
    121 
    122 /* ICU is not endian portable, because ICU data generated on big endian machines can be
    123  * ported to big endian machines but not to little endian machines and vice versa. The
    124  * conversion is not portable across platforms with different endianess.
    125  */
    126 
    127 uint32_t computeCRC(char *ptr, uint32_t len, uint32_t lastcrc){
    128     int32_t crc;
    129     uint32_t temp1;
    130     uint32_t temp2;
    131 
    132     int32_t crc_ta[256];
    133     int i = 0;
    134     int j = 0;
    135     uint32_t crc2 = 0;
    136 
    137 #define CRC32_POLYNOMIAL 0xEDB88320
    138 
    139     /*build crc table*/
    140     for (i = 0; i <= 255; i++) {
    141         crc2 = i;
    142         for (j = 8; j > 0; j--) {
    143             if ((crc2 & 1) == 1) {
    144                 crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL;
    145             } else {
    146                 crc2 >>= 1;
    147             }
    148         }
    149         crc_ta[i] = crc2;
    150     }
    151 
    152     crc = lastcrc;
    153     while(len--!=0) {
    154         temp1 = (uint32_t)crc>>8;
    155         temp2 = crc_ta[(crc^*ptr) & 0xFF];
    156         crc = temp1^temp2;
    157         ptr++;
    158     }
    159     return(crc);
    160 }
    161 
    162 static void strnrepchr(char* src, int32_t srcLen, char s, char r){
    163     int32_t i = 0;
    164     for(i=0;i<srcLen;i++){
    165         if(src[i]==s){
    166             src[i]=r;
    167         }
    168     }
    169 }
    170 /* Parse the filename, and get its language information.
    171  * If it fails to get the language information from the filename,
    172  * use "en" as the default value for language
    173  */
    174 static char* parseFilename(const char* id, char* /*lang*/) {
    175     int idLen = (int) uprv_strlen(id);
    176     char* localeID = (char*) uprv_malloc(idLen);
    177     int pos = 0;
    178     int canonCapacity = 0;
    179     char* canon = NULL;
    180     int canonLen = 0;
    181     /*int i;*/
    182     UErrorCode status = U_ZERO_ERROR;
    183     const char *ext = uprv_strchr(id, '.');
    184 
    185     if(ext != NULL){
    186         pos = (int) (ext - id);
    187     } else {
    188         pos = idLen;
    189     }
    190     uprv_memcpy(localeID, id, pos);
    191     localeID[pos]=0; /* NUL terminate the string */
    192 
    193     canonCapacity =pos*3;
    194     canon = (char*) uprv_malloc(canonCapacity);
    195     canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status);
    196 
    197     if(U_FAILURE(status)){
    198         fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status));
    199         exit(status);
    200     }
    201     strnrepchr(canon, canonLen, '_', '-');
    202     return canon;
    203 }
    204 
    205 static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
    206 #if 0
    207 static const char* bundleStart = "<xliff version = \"1.2\" "
    208                                         "xmlns='urn:oasis:names:tc:xliff:document:1.2' "
    209                                         "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
    210                                         "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n";
    211 #else
    212 static const char* bundleStart = "<xliff version = \"1.1\" "
    213                                         "xmlns='urn:oasis:names:tc:xliff:document:1.1' "
    214                                         "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
    215                                         "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n";
    216 #endif
    217 static const char* bundleEnd   = "</xliff>\n";
    218 
    219 void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status);
    220 
    221 static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength,
    222                               const UChar* src, int32_t srcLen, UErrorCode* status){
    223     int32_t srcIndex=0;
    224     char* dest=NULL;
    225     char* temp=NULL;
    226     int32_t destLen=0;
    227     UChar32 c = 0;
    228 
    229     if(status==NULL || U_FAILURE(*status) || pDest==NULL  || srcLen==0 || src == NULL){
    230         return NULL;
    231     }
    232     dest =*pDest;
    233     if(dest==NULL || destCap <=0){
    234         destCap = srcLen * 8;
    235         dest = (char*) uprv_malloc(sizeof(char) * destCap);
    236         if(dest==NULL){
    237             *status=U_MEMORY_ALLOCATION_ERROR;
    238             return NULL;
    239         }
    240     }
    241 
    242     dest[0]=0;
    243 
    244     while(srcIndex<srcLen){
    245         U16_NEXT(src, srcIndex, srcLen, c);
    246 
    247         if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
    248             *status = U_ILLEGAL_CHAR_FOUND;
    249             fprintf(stderr, "Illegal Surrogate! \n");
    250             uprv_free(dest);
    251             return NULL;
    252         }
    253 
    254         if((destLen+UTF8_CHAR_LENGTH(c)) < destCap){
    255 
    256             /* ASCII Range */
    257             if(c <=0x007F){
    258                 switch(c) {
    259                 case '\x26':
    260                     uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &amp;*/
    261                     destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b");
    262                     break;
    263                 case '\x3c':
    264                     uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* &lt;*/
    265                     destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b");
    266                     break;
    267                 case '\x3e':
    268                     uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* &gt;*/
    269                     destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b");
    270                     break;
    271                 case '\x22':
    272                     uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* &quot;*/
    273                     destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b");
    274                     break;
    275                 case '\x27':
    276                     uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* &apos; */
    277                     destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b");
    278                     break;
    279 
    280                  /* Disallow C0 controls except TAB, CR, LF*/
    281                 case 0x00:
    282                 case 0x01:
    283                 case 0x02:
    284                 case 0x03:
    285                 case 0x04:
    286                 case 0x05:
    287                 case 0x06:
    288                 case 0x07:
    289                 case 0x08:
    290                 /*case 0x09:*/
    291                 /*case 0x0A: */
    292                 case 0x0B:
    293                 case 0x0C:
    294                 /*case 0x0D:*/
    295                 case 0x0E:
    296                 case 0x0F:
    297                 case 0x10:
    298                 case 0x11:
    299                 case 0x12:
    300                 case 0x13:
    301                 case 0x14:
    302                 case 0x15:
    303                 case 0x16:
    304                 case 0x17:
    305                 case 0x18:
    306                 case 0x19:
    307                 case 0x1A:
    308                 case 0x1B:
    309                 case 0x1C:
    310                 case 0x1D:
    311                 case 0x1E:
    312                 case 0x1F:
    313                     *status = U_ILLEGAL_CHAR_FOUND;
    314                     fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c);
    315                     uprv_free(dest);
    316                     return NULL;
    317                 default:
    318                     dest[destLen++]=(char)c;
    319                 }
    320             }else{
    321                 UBool isError = FALSE;
    322                 U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError);
    323                 if(isError){
    324                     *status = U_ILLEGAL_CHAR_FOUND;
    325                     fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c);
    326                     uprv_free(dest);
    327                     return NULL;
    328                 }
    329             }
    330         }else{
    331             destCap += destLen;
    332 
    333             temp = (char*) uprv_malloc(sizeof(char)*destCap);
    334             if(temp==NULL){
    335                 *status=U_MEMORY_ALLOCATION_ERROR;
    336                 uprv_free(dest);
    337                 return NULL;
    338             }
    339             uprv_memmove(temp,dest,destLen);
    340             destLen=0;
    341             uprv_free(dest);
    342             dest=temp;
    343             temp=NULL;
    344         }
    345 
    346     }
    347     *destLength = destLen;
    348     return dest;
    349 }
    350 
    351 #define ASTERISK 0x002A
    352 #define SPACE    0x0020
    353 #define CR       0x000A
    354 #define LF       0x000D
    355 #define AT_SIGN  0x0040
    356 
    357 static void
    358 trim(char **src, int32_t *len){
    359 
    360     char *s = NULL;
    361     int32_t i = 0;
    362     if(src == NULL || *src == NULL){
    363         return;
    364     }
    365     s = *src;
    366     /* trim from the end */
    367     for( i=(*len-1); i>= 0; i--){
    368         switch(s[i]){
    369         case ASTERISK:
    370         case SPACE:
    371         case CR:
    372         case LF:
    373             s[i] = 0;
    374             continue;
    375         default:
    376             break;
    377         }
    378         break;
    379 
    380     }
    381     *len = i+1;
    382 }
    383 
    384 static void
    385 print(UChar* src, int32_t srcLen,const char *tagStart,const char *tagEnd,  UErrorCode *status){
    386     int32_t bufCapacity   = srcLen*4;
    387     char *buf       = NULL;
    388     int32_t bufLen = 0;
    389 
    390     if(U_FAILURE(*status)){
    391         return;
    392     }
    393 
    394     buf = (char*) (uprv_malloc(bufCapacity));
    395     if(buf==0){
    396         fprintf(stderr, "Could not allocate memory!!");
    397         exit(U_MEMORY_ALLOCATION_ERROR);
    398     }
    399     buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status);
    400     if(U_SUCCESS(*status)){
    401         trim(&buf,&bufLen);
    402         write_utf8_file(out,UnicodeString(tagStart));
    403         write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8"));
    404         write_utf8_file(out,UnicodeString(tagEnd));
    405         write_utf8_file(out,UnicodeString("\n"));
    406 
    407     }
    408 }
    409 static void
    410 printNoteElements(struct UString *src, UErrorCode *status){
    411 
    412 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
    413 
    414     int32_t capacity = 0;
    415     UChar* note = NULL;
    416     int32_t noteLen = 0;
    417     int32_t count = 0,i;
    418 
    419     if(src == NULL){
    420         return;
    421     }
    422 
    423     capacity = src->fLength;
    424     note  = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
    425 
    426     count = getCount(src->fChars,src->fLength, UPC_NOTE, status);
    427     if(U_FAILURE(*status)){
    428         uprv_free(note);
    429         return;
    430     }
    431     for(i=0; i < count; i++){
    432         noteLen =  getAt(src->fChars,src->fLength, &note, capacity, i, UPC_NOTE, status);
    433         if(U_FAILURE(*status)){
    434             uprv_free(note);
    435             return;
    436         }
    437         if(noteLen > 0){
    438             write_tabs(out);
    439             print(note, noteLen,"<note>", "</note>", status);
    440         }
    441     }
    442     uprv_free(note);
    443 #else
    444 
    445     fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
    446 
    447 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
    448 
    449 }
    450 
    451 static void printAttribute(const char *name, const char *value, int32_t /*len*/)
    452 {
    453     write_utf8_file(out, UnicodeString(" "));
    454     write_utf8_file(out, UnicodeString(name));
    455     write_utf8_file(out, UnicodeString(" = \""));
    456     write_utf8_file(out, UnicodeString(value));
    457     write_utf8_file(out, UnicodeString("\""));
    458 }
    459 
    460 static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/)
    461 {
    462     write_utf8_file(out, UnicodeString(" "));
    463     write_utf8_file(out, UnicodeString(name));
    464     write_utf8_file(out, UnicodeString(" = \""));
    465     write_utf8_file(out, value);
    466     write_utf8_file(out, UnicodeString("\""));
    467 }
    468 
    469 static void
    470 printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){
    471 
    472 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
    473 
    474     if(status==NULL || U_FAILURE(*status)){
    475         return;
    476     }
    477 
    478     int32_t capacity = src->fLength + 1;
    479     char* buf = NULL;
    480     int32_t bufLen = 0;
    481     UChar* desc  = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
    482     UChar* trans = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
    483 
    484     int32_t descLen = 0, transLen=0;
    485     if(desc==NULL || trans==NULL){
    486         *status = U_MEMORY_ALLOCATION_ERROR;
    487         uprv_free(desc);
    488         uprv_free(trans);
    489         return;
    490     }
    491     src->fLength = removeCmtText(src->fChars, src->fLength, status);
    492     descLen  = getDescription(src->fChars,src->fLength, &desc, capacity, status);
    493     transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status);
    494 
    495     /* first print translate attribute */
    496     if(transLen > 0){
    497         if(printTranslate){
    498             /* print translate attribute */
    499             buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status);
    500             if(U_SUCCESS(*status)){
    501                 printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen);
    502                 write_utf8_file(out,UnicodeString(">\n"));
    503             }
    504         }else if(getShowWarning()){
    505             fprintf(stderr, "Warning: Tranlate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName);
    506             /* no translate attribute .. just close the tag */
    507             write_utf8_file(out,UnicodeString(">\n"));
    508         }
    509     }else{
    510         /* no translate attribute .. just close the tag */
    511         write_utf8_file(out,UnicodeString(">\n"));
    512     }
    513 
    514     if(descLen > 0){
    515         write_tabs(out);
    516         print(desc, descLen, "<!--", "-->", status);
    517     }
    518 
    519     uprv_free(desc);
    520     uprv_free(trans);
    521 #else
    522 
    523     fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
    524 
    525 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
    526 
    527 }
    528 
    529 /*
    530  * Print out a containing element, like:
    531  * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no">
    532  * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array">
    533  */
    534 static char *printContainer(struct SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
    535 {
    536     char resKeyBuffer[8];
    537     const char *resname = NULL;
    538     char *sid = NULL;
    539 
    540     write_tabs(out);
    541 
    542     resname = res_getKeyString(srBundle, res, resKeyBuffer);
    543     if (resname != NULL && *resname != 0) {
    544         sid = getID(id, resname, sid);
    545     } else {
    546         sid = getID(id, NULL, sid);
    547     }
    548 
    549     write_utf8_file(out, UnicodeString("<"));
    550     write_utf8_file(out, UnicodeString(container));
    551     printAttribute("id", sid, (int32_t) uprv_strlen(sid));
    552 
    553     if (resname != NULL) {
    554         printAttribute("resname", resname, (int32_t) uprv_strlen(resname));
    555     }
    556 
    557     if (mimetype != NULL) {
    558         printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype));
    559     }
    560 
    561     if (restype != NULL) {
    562         printAttribute("restype", restype, (int32_t) uprv_strlen(restype));
    563     }
    564 
    565     tabCount += 1;
    566     if (res->fComment.fLength > 0) {
    567         /* printComments will print the closing ">\n" */
    568         printComments(&res->fComment, resname, TRUE, status);
    569     } else {
    570         write_utf8_file(out, UnicodeString(">\n"));
    571     }
    572 
    573     return sid;
    574 }
    575 
    576 /* Writing Functions */
    577 
    578 static const char *trans_unit = "trans-unit";
    579 static const char *close_trans_unit = "</trans-unit>\n";
    580 static const char *source = "<source>";
    581 static const char *close_source = "</source>\n";
    582 static const char *group = "group";
    583 static const char *close_group = "</group>\n";
    584 
    585 static const char *bin_unit = "bin-unit";
    586 static const char *close_bin_unit = "</bin-unit>\n";
    587 static const char *bin_source = "<bin-source>\n";
    588 static const char *close_bin_source = "</bin-source>\n";
    589 static const char *external_file = "<external-file";
    590 /*static const char *close_external_file = "</external-file>\n";*/
    591 static const char *internal_file = "<internal-file";
    592 static const char *close_internal_file = "</internal-file>\n";
    593 
    594 static const char *application_mimetype = "application"; /* add "/octet-stream"? */
    595 
    596 static const char *alias_restype     = "x-icu-alias";
    597 static const char *array_restype     = "x-icu-array";
    598 static const char *binary_restype    = "x-icu-binary";
    599 static const char *integer_restype   = "x-icu-integer";
    600 static const char *intvector_restype = "x-icu-intvector";
    601 static const char *table_restype     = "x-icu-table";
    602 
    603 static void
    604 string_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    605 
    606     char *sid = NULL;
    607     char* buf = NULL;
    608     int32_t bufLen = 0;
    609 
    610     if(status==NULL || U_FAILURE(*status)){
    611         return;
    612     }
    613 
    614     sid = printContainer(res, trans_unit, NULL, NULL, id, status);
    615 
    616     write_tabs(out);
    617 
    618     write_utf8_file(out, UnicodeString(source));
    619 
    620     buf = convertAndEscape(&buf, 0, &bufLen, res->u.fString.fChars, res->u.fString.fLength, status);
    621 
    622     if (U_FAILURE(*status)) {
    623         return;
    624     }
    625 
    626     write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
    627     write_utf8_file(out, UnicodeString(close_source));
    628 
    629     printNoteElements(&res->fComment, status);
    630 
    631     tabCount -= 1;
    632     write_tabs(out);
    633 
    634     write_utf8_file(out, UnicodeString(close_trans_unit));
    635 
    636     uprv_free(buf);
    637     uprv_free(sid);
    638 }
    639 
    640 static void
    641 alias_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    642     char *sid = NULL;
    643     char* buf = NULL;
    644     int32_t bufLen=0;
    645 
    646     sid = printContainer(res, trans_unit, alias_restype, NULL, id, status);
    647 
    648     write_tabs(out);
    649 
    650     write_utf8_file(out, UnicodeString(source));
    651 
    652     buf = convertAndEscape(&buf, 0, &bufLen, res->u.fString.fChars, res->u.fString.fLength, status);
    653 
    654     if(U_FAILURE(*status)){
    655         return;
    656     }
    657     write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
    658     write_utf8_file(out, UnicodeString(close_source));
    659 
    660     printNoteElements(&res->fComment, status);
    661 
    662     tabCount -= 1;
    663     write_tabs(out);
    664 
    665     write_utf8_file(out, UnicodeString(close_trans_unit));
    666 
    667     uprv_free(buf);
    668     uprv_free(sid);
    669 }
    670 
    671 static void
    672 array_write_xml(struct SResource *res, const char* id, const char* language, UErrorCode *status) {
    673     char* sid = NULL;
    674     int index = 0;
    675 
    676     struct SResource *current = NULL;
    677     struct SResource *first =NULL;
    678 
    679     sid = printContainer(res, group, array_restype, NULL, id, status);
    680 
    681     current = res->u.fArray.fFirst;
    682     first=current;
    683 
    684     while (current != NULL) {
    685         char c[256] = {0};
    686         char* subId = NULL;
    687 
    688         itostr(c, index, 10, 0);
    689         index += 1;
    690         subId = getID(sid, c, subId);
    691 
    692         res_write_xml(current, subId, language, FALSE, status);
    693         uprv_free(subId);
    694         subId = NULL;
    695 
    696         if(U_FAILURE(*status)){
    697             return;
    698         }
    699 
    700         current = current->fNext;
    701     }
    702 
    703     tabCount -= 1;
    704     write_tabs(out);
    705     write_utf8_file(out, UnicodeString(close_group));
    706 
    707     uprv_free(sid);
    708 }
    709 
    710 static void
    711 intvector_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    712     char* sid = NULL;
    713     char* ivd = NULL;
    714     uint32_t i=0;
    715     uint32_t len=0;
    716     char buf[256] = {'0'};
    717 
    718     sid = printContainer(res, group, intvector_restype, NULL, id, status);
    719 
    720     for(i = 0; i < res->u.fIntVector.fCount; i += 1) {
    721         char c[256] = {0};
    722 
    723         itostr(c, i, 10, 0);
    724         ivd = getID(sid, c, ivd);
    725         len = itostr(buf, res->u.fIntVector.fArray[i], 10, 0);
    726 
    727         write_tabs(out);
    728         write_utf8_file(out, UnicodeString("<"));
    729         write_utf8_file(out, UnicodeString(trans_unit));
    730 
    731         printAttribute("id", ivd, (int32_t)uprv_strlen(ivd));
    732         printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype));
    733 
    734         write_utf8_file(out, UnicodeString(">\n"));
    735 
    736         tabCount += 1;
    737         write_tabs(out);
    738         write_utf8_file(out, UnicodeString(source));
    739 
    740         write_utf8_file(out, UnicodeString(buf, len));
    741 
    742         write_utf8_file(out, UnicodeString(close_source));
    743         tabCount -= 1;
    744         write_tabs(out);
    745         write_utf8_file(out, UnicodeString(close_trans_unit));
    746 
    747         uprv_free(ivd);
    748         ivd = NULL;
    749     }
    750 
    751     tabCount -= 1;
    752     write_tabs(out);
    753 
    754     write_utf8_file(out, UnicodeString(close_group));
    755     uprv_free(sid);
    756     sid = NULL;
    757 }
    758 
    759 static void
    760 int_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    761     char* sid = NULL;
    762     char buf[256] = {0};
    763     uint32_t len = 0;
    764 
    765     sid = printContainer(res, trans_unit, integer_restype, NULL, id, status);
    766 
    767     write_tabs(out);
    768 
    769     write_utf8_file(out, UnicodeString(source));
    770 
    771     len = itostr(buf, res->u.fIntValue.fValue, 10, 0);
    772     write_utf8_file(out, UnicodeString(buf, len));
    773 
    774     write_utf8_file(out, UnicodeString(close_source));
    775 
    776     printNoteElements(&res->fComment, status);
    777 
    778     tabCount -= 1;
    779     write_tabs(out);
    780 
    781     write_utf8_file(out, UnicodeString(close_trans_unit));
    782 
    783     uprv_free(sid);
    784     sid = NULL;
    785 }
    786 
    787 static void
    788 bin_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    789     const char* m_type = application_mimetype;
    790     char* sid = NULL;
    791     uint32_t crc = 0xFFFFFFFF;
    792 
    793     char fileName[1024] ={0};
    794     int32_t tLen = ( outDir == NULL) ? 0 :(int32_t)uprv_strlen(outDir);
    795     char* fn =  (char*) uprv_malloc(sizeof(char) * (tLen+1024 +
    796                                                     (res->u.fBinaryValue.fFileName !=NULL ?
    797                                                     uprv_strlen(res->u.fBinaryValue.fFileName) :0)));
    798     const char* ext = NULL;
    799 
    800     char* f = NULL;
    801 
    802     fn[0]=0;
    803 
    804     if(res->u.fBinaryValue.fFileName != NULL){
    805         uprv_strcpy(fileName, res->u.fBinaryValue.fFileName);
    806         f = uprv_strrchr(fileName, '\\');
    807 
    808         if (f != NULL) {
    809             f++;
    810         } else {
    811             f = fileName;
    812         }
    813 
    814         ext = uprv_strrchr(fileName, '.');
    815 
    816         if (ext == NULL) {
    817             fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName);
    818             exit(U_ILLEGAL_ARGUMENT_ERROR);
    819         }
    820 
    821         if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){
    822             m_type = "image";
    823         } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){
    824             m_type = "audio";
    825         } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){
    826             m_type = "video";
    827         } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){
    828             m_type = "text";
    829         }
    830 
    831         sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
    832 
    833         write_tabs(out);
    834 
    835         write_utf8_file(out, UnicodeString(bin_source));
    836 
    837         tabCount+= 1;
    838         write_tabs(out);
    839 
    840         write_utf8_file(out, UnicodeString(external_file));
    841         printAttribute("href", f, (int32_t)uprv_strlen(f));
    842         write_utf8_file(out, UnicodeString("/>\n"));
    843         tabCount -= 1;
    844         write_tabs(out);
    845 
    846         write_utf8_file(out, UnicodeString(close_bin_source));
    847 
    848         printNoteElements(&res->fComment, status);
    849         tabCount -= 1;
    850         write_tabs(out);
    851         write_utf8_file(out, UnicodeString(close_bin_unit));
    852     } else {
    853         char temp[256] = {0};
    854         uint32_t i = 0;
    855         int32_t len=0;
    856 
    857         sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
    858 
    859         write_tabs(out);
    860         write_utf8_file(out, UnicodeString(bin_source));
    861 
    862         tabCount += 1;
    863         write_tabs(out);
    864 
    865         write_utf8_file(out, UnicodeString(internal_file));
    866         printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype));
    867 
    868         while(i <res->u.fBinaryValue.fLength){
    869             len = itostr(temp, res->u.fBinaryValue.fData[i], 16, 2);
    870             crc = computeCRC(temp, len, crc);
    871             i++;
    872         }
    873 
    874         len = itostr(temp, crc, 10, 0);
    875         printAttribute("crc", temp, len);
    876 
    877         write_utf8_file(out, UnicodeString(">"));
    878 
    879         i = 0;
    880         while(i <res->u.fBinaryValue.fLength){
    881             len = itostr(temp, res->u.fBinaryValue.fData[i], 16, 2);
    882             write_utf8_file(out, UnicodeString(temp));
    883             i += 1;
    884         }
    885 
    886         write_utf8_file(out, UnicodeString(close_internal_file));
    887 
    888         tabCount -= 2;
    889         write_tabs(out);
    890 
    891         write_utf8_file(out, UnicodeString(close_bin_source));
    892         printNoteElements(&res->fComment, status);
    893 
    894         tabCount -= 1;
    895         write_tabs(out);
    896         write_utf8_file(out, UnicodeString(close_bin_unit));
    897 
    898         uprv_free(sid);
    899         sid = NULL;
    900     }
    901 
    902     uprv_free(fn);
    903 }
    904 
    905 
    906 
    907 static void
    908 table_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
    909 
    910     uint32_t  i         = 0;
    911 
    912     struct SResource *current = NULL;
    913     struct SResource *save = NULL;
    914     char* sid = NULL;
    915 
    916     if (U_FAILURE(*status)) {
    917         return ;
    918     }
    919 
    920     sid = printContainer(res, group, table_restype, NULL, id, status);
    921 
    922     if(isTopLevel) {
    923         sid[0] = '\0';
    924     }
    925 
    926     save = current = res->u.fTable.fFirst;
    927     i = 0;
    928 
    929     while (current != NULL) {
    930         res_write_xml(current, sid, language, FALSE, status);
    931 
    932         if(U_FAILURE(*status)){
    933             return;
    934         }
    935 
    936         i += 1;
    937         current = current->fNext;
    938     }
    939 
    940     tabCount -= 1;
    941     write_tabs(out);
    942 
    943     write_utf8_file(out, UnicodeString(close_group));
    944 
    945     uprv_free(sid);
    946     sid = NULL;
    947 }
    948 
    949 void
    950 res_write_xml(struct SResource *res, const char* id,  const char* language, UBool isTopLevel, UErrorCode *status) {
    951 
    952     if (U_FAILURE(*status)) {
    953         return ;
    954     }
    955 
    956     if (res != NULL) {
    957         switch (res->fType) {
    958         case URES_STRING:
    959              string_write_xml    (res, id, language, status);
    960              return;
    961 
    962         case URES_ALIAS:
    963              alias_write_xml     (res, id, language, status);
    964              return;
    965 
    966         case URES_INT_VECTOR:
    967              intvector_write_xml (res, id, language, status);
    968              return;
    969 
    970         case URES_BINARY:
    971              bin_write_xml       (res, id, language, status);
    972              return;
    973 
    974         case URES_INT:
    975              int_write_xml       (res, id, language, status);
    976              return;
    977 
    978         case URES_ARRAY:
    979              array_write_xml     (res, id, language, status);
    980              return;
    981 
    982         case URES_TABLE:
    983              table_write_xml     (res, id, language, isTopLevel, status);
    984              return;
    985 
    986         default:
    987             break;
    988         }
    989     }
    990 
    991     *status = U_INTERNAL_PROGRAM_ERROR;
    992 }
    993 
    994 void
    995 bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename,
    996                   char *writtenFilename, int writtenFilenameLen,
    997                   const char* language, const char* outFileName, UErrorCode *status) {
    998 
    999     char* xmlfileName = NULL;
   1000     char* outputFileName = NULL;
   1001     char* originalFileName = NULL;
   1002     const char* fileStart = "<file xml:space = \"preserve\" source-language = \"";
   1003     const char* file1 = "\" datatype = \"x-icu-resource-bundle\" ";
   1004     const char* file2 = "original = \"";
   1005     const char* file4 = "\" date = \"";
   1006     const char* fileEnd = "</file>\n";
   1007     const char* headerStart = "<header>\n";
   1008     const char* headerEnd = "</header>\n";
   1009     const char* bodyStart = "<body>\n";
   1010     const char* bodyEnd = "</body>\n";
   1011 
   1012     const char *tool_start = "<tool";
   1013     const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION;
   1014     const char *tool_name = "genrb";
   1015 
   1016     char* temp = NULL;
   1017     char* lang = NULL;
   1018     const char* pos = NULL;
   1019     int32_t first, index;
   1020     time_t currTime;
   1021     char timeBuf[128];
   1022 
   1023     outDir = outputDir;
   1024 
   1025     srBundle = bundle;
   1026 
   1027     pos = uprv_strrchr(filename, '\\');
   1028     if(pos != NULL) {
   1029         first = (int32_t)(pos - filename + 1);
   1030     } else {
   1031         first = 0;
   1032     }
   1033     index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first);
   1034     originalFileName = (char *)uprv_malloc(sizeof(char)*index+1);
   1035     uprv_memset(originalFileName, 0, sizeof(char)*index+1);
   1036     uprv_strncpy(originalFileName, filename + first, index);
   1037 
   1038     if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) {
   1039         fprintf(stdout, "Warning: The file name is not same as the resource name!\n");
   1040     }
   1041 
   1042     temp = originalFileName;
   1043     originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
   1044     uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
   1045     uprv_strcat(originalFileName, temp);
   1046     uprv_strcat(originalFileName, textExt);
   1047     uprv_free(temp);
   1048     temp = NULL;
   1049 
   1050 
   1051     if (language == NULL) {
   1052 /*        lang = parseFilename(filename, lang);
   1053         if (lang == NULL) {*/
   1054             /* now check if locale name is valid or not
   1055              * this is to cater for situation where
   1056              * pegasusServer.txt contains
   1057              *
   1058              * en{
   1059              *      ..
   1060              * }
   1061              */
   1062              lang = parseFilename(srBundle->fLocale, lang);
   1063              /*
   1064               * Neither  the file name nor the table name inside the
   1065               * txt file contain a valid country and language codes
   1066               * throw an error.
   1067               * pegasusServer.txt contains
   1068               *
   1069               * testelements{
   1070               *     ....
   1071               * }
   1072               */
   1073              if(lang==NULL){
   1074                  fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n");
   1075                  exit(U_ILLEGAL_ARGUMENT_ERROR);
   1076              }
   1077        /* }*/
   1078     } else {
   1079         lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1);
   1080         uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1);
   1081         uprv_strcpy(lang, language);
   1082     }
   1083 
   1084     if(outFileName) {
   1085         outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1);
   1086         uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1);
   1087         uprv_strcpy(outputFileName,outFileName);
   1088     } else {
   1089         outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
   1090         uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
   1091         uprv_strcpy(outputFileName,srBundle->fLocale);
   1092     }
   1093 
   1094     if(outputDir) {
   1095         xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
   1096         uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
   1097     } else {
   1098         xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
   1099         uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
   1100     }
   1101 
   1102     if(outputDir){
   1103         uprv_strcpy(xmlfileName, outputDir);
   1104         if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
   1105             uprv_strcat(xmlfileName,U_FILE_SEP_STRING);
   1106         }
   1107     }
   1108     uprv_strcat(xmlfileName,outputFileName);
   1109     uprv_strcat(xmlfileName,xliffExt);
   1110 
   1111     if (writtenFilename) {
   1112         uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen);
   1113     }
   1114 
   1115     if (U_FAILURE(*status)) {
   1116         goto cleanup_bundle_write_xml;
   1117     }
   1118 
   1119     out= T_FileStream_open(xmlfileName,"w");
   1120 
   1121     if(out==NULL){
   1122         *status = U_FILE_ACCESS_ERROR;
   1123         goto cleanup_bundle_write_xml;
   1124     }
   1125     write_utf8_file(out, xmlHeader);
   1126 
   1127     if(outputEnc && *outputEnc!='\0'){
   1128         /* store the output encoding */
   1129         enc = outputEnc;
   1130         conv=ucnv_open(enc,status);
   1131         if(U_FAILURE(*status)){
   1132             goto cleanup_bundle_write_xml;
   1133         }
   1134     }
   1135     write_utf8_file(out, bundleStart);
   1136     write_tabs(out);
   1137     write_utf8_file(out, fileStart);
   1138     /* check if lang and language are the same */
   1139     if(language != NULL && uprv_strcmp(lang, srBundle->fLocale)!=0){
   1140         fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n");
   1141     }
   1142     write_utf8_file(out, UnicodeString(lang));
   1143     write_utf8_file(out, UnicodeString(file1));
   1144     write_utf8_file(out, UnicodeString(file2));
   1145     write_utf8_file(out, UnicodeString(originalFileName));
   1146     write_utf8_file(out, UnicodeString(file4));
   1147 
   1148     time(&currTime);
   1149     strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime));
   1150     write_utf8_file(out, UnicodeString(timeBuf));
   1151     write_utf8_file(out, UnicodeString("\">\n"));
   1152 
   1153     tabCount += 1;
   1154     write_tabs(out);
   1155     write_utf8_file(out, headerStart);
   1156 
   1157     tabCount += 1;
   1158     write_tabs(out);
   1159 
   1160     write_utf8_file(out, tool_start);
   1161     printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id));
   1162     printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name));
   1163     write_utf8_file(out, UnicodeString("/>\n"));
   1164 
   1165     tabCount -= 1;
   1166     write_tabs(out);
   1167 
   1168     write_utf8_file(out, UnicodeString(headerEnd));
   1169 
   1170     write_tabs(out);
   1171     tabCount += 1;
   1172 
   1173     write_utf8_file(out, UnicodeString(bodyStart));
   1174 
   1175 
   1176     res_write_xml(bundle->fRoot, bundle->fLocale, lang, TRUE, status);
   1177 
   1178     tabCount -= 1;
   1179     write_tabs(out);
   1180 
   1181     write_utf8_file(out, UnicodeString(bodyEnd));
   1182     tabCount--;
   1183     write_tabs(out);
   1184     write_utf8_file(out, UnicodeString(fileEnd));
   1185     tabCount--;
   1186     write_tabs(out);
   1187     write_utf8_file(out, UnicodeString(bundleEnd));
   1188     T_FileStream_close(out);
   1189 
   1190     ucnv_close(conv);
   1191 
   1192 cleanup_bundle_write_xml:
   1193     uprv_free(originalFileName);
   1194     uprv_free(lang);
   1195     if(xmlfileName != NULL) {
   1196         uprv_free(xmlfileName);
   1197     }
   1198     if(outputFileName != NULL){
   1199         uprv_free(outputFileName);
   1200     }
   1201 }
   1202