Home | History | Annotate | Download | only in genrb
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2002-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *
     11 * File wrtxml.cpp
     12 *
     13 * Modification History:
     14 *
     15 *   Date        Name        Description
     16 *   10/01/02    Ram         Creation.
     17 *   02/07/08    Spieth      Correct XLIFF generation on EBCDIC platform
     18 *
     19 *******************************************************************************
     20 */
     21 
     22 // Safer use of UnicodeString.
     23 #ifndef UNISTR_FROM_CHAR_EXPLICIT
     24 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
     25 #endif
     26 
     27 // Less important, but still a good idea.
     28 #ifndef UNISTR_FROM_STRING_EXPLICIT
     29 #   define UNISTR_FROM_STRING_EXPLICIT explicit
     30 #endif
     31 
     32 #include "reslist.h"
     33 #include "unewdata.h"
     34 #include "unicode/ures.h"
     35 #include "errmsg.h"
     36 #include "filestrm.h"
     37 #include "cstring.h"
     38 #include "unicode/ucnv.h"
     39 #include "genrb.h"
     40 #include "rle.h"
     41 #include "uhash.h"
     42 #include "uresimp.h"
     43 #include "unicode/ustring.h"
     44 #include "unicode/uchar.h"
     45 #include "ustr.h"
     46 #include "prscmnts.h"
     47 #include "unicode/unistr.h"
     48 #include "unicode/utf8.h"
     49 #include "unicode/utf16.h"
     50 #include <time.h>
     51 
     52 U_NAMESPACE_USE
     53 
     54 static int tabCount = 0;
     55 
     56 static FileStream* out=NULL;
     57 static struct SRBRoot* srBundle ;
     58 static const char* outDir = NULL;
     59 static const char* enc ="";
     60 static UConverter* conv = NULL;
     61 
     62 const char* const* ISOLanguages;
     63 const char* const* ISOCountries;
     64 const char* textExt = ".txt";
     65 const char* xliffExt = ".xlf";
     66 
     67 static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
     68 {
     69     UErrorCode status = U_ZERO_ERROR;
     70     int32_t len = 0;
     71 
     72     // preflight to get the destination buffer size
     73     u_strToUTF8(NULL,
     74                 0,
     75                 &len,
     76                 toUCharPtr(outString.getBuffer()),
     77                 outString.length(),
     78                 &status);
     79 
     80     // allocate the buffer
     81     char* dest = (char*)uprv_malloc(len);
     82     status = U_ZERO_ERROR;
     83 
     84     // convert the data
     85     u_strToUTF8(dest,
     86                 len,
     87                 &len,
     88                 toUCharPtr(outString.getBuffer()),
     89                 outString.length(),
     90                 &status);
     91 
     92     // write data to out file
     93     int32_t ret = T_FileStream_write(fileStream, dest, len);
     94     uprv_free(dest);
     95     return (ret);
     96 }
     97 
     98 /*write indentation for formatting*/
     99 static void write_tabs(FileStream* os){
    100     int i=0;
    101     for(;i<=tabCount;i++){
    102         write_utf8_file(os,UnicodeString("    "));
    103     }
    104 }
    105 
    106 /*get ID for each element. ID is globally unique.*/
    107 static char* getID(const char* id, const char* curKey, char* result) {
    108     if(curKey == NULL) {
    109         result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
    110         uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
    111         uprv_strcpy(result, id);
    112     } else {
    113         result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
    114         uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
    115         if(id[0]!='\0'){
    116             uprv_strcpy(result, id);
    117             uprv_strcat(result, "_");
    118         }
    119         uprv_strcat(result, curKey);
    120     }
    121     return result;
    122 }
    123 
    124 /*compute CRC for binary code*/
    125 /* The code is from  http://www.theorem.com/java/CRC32.java
    126  * Calculates the CRC32 - 32 bit Cyclical Redundancy Check
    127  * <P> This check is used in numerous systems to verify the integrity
    128  * of information.  It's also used as a hashing function.  Unlike a regular
    129  * checksum, it's sensitive to the order of the characters.
    130  * It produces a 32 bit
    131  *
    132  * @author Michael Lecuyer (mjl (at) theorem.com)
    133  * @version 1.1 August 11, 1998
    134  */
    135 
    136 /* ICU is not endian portable, because ICU data generated on big endian machines can be
    137  * ported to big endian machines but not to little endian machines and vice versa. The
    138  * conversion is not portable across platforms with different endianess.
    139  */
    140 
    141 uint32_t computeCRC(const char *ptr, uint32_t len, uint32_t lastcrc){
    142     int32_t crc;
    143     uint32_t temp1;
    144     uint32_t temp2;
    145 
    146     int32_t crc_ta[256];
    147     int i = 0;
    148     int j = 0;
    149     uint32_t crc2 = 0;
    150 
    151 #define CRC32_POLYNOMIAL 0xEDB88320
    152 
    153     /*build crc table*/
    154     for (i = 0; i <= 255; i++) {
    155         crc2 = i;
    156         for (j = 8; j > 0; j--) {
    157             if ((crc2 & 1) == 1) {
    158                 crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL;
    159             } else {
    160                 crc2 >>= 1;
    161             }
    162         }
    163         crc_ta[i] = crc2;
    164     }
    165 
    166     crc = lastcrc;
    167     while(len--!=0) {
    168         temp1 = (uint32_t)crc>>8;
    169         temp2 = crc_ta[(crc^*ptr) & 0xFF];
    170         crc = temp1^temp2;
    171         ptr++;
    172     }
    173     return(crc);
    174 }
    175 
    176 static void strnrepchr(char* src, int32_t srcLen, char s, char r){
    177     int32_t i = 0;
    178     for(i=0;i<srcLen;i++){
    179         if(src[i]==s){
    180             src[i]=r;
    181         }
    182     }
    183 }
    184 /* Parse the filename, and get its language information.
    185  * If it fails to get the language information from the filename,
    186  * use "en" as the default value for language
    187  */
    188 static char* parseFilename(const char* id, char* /*lang*/) {
    189     int idLen = (int) uprv_strlen(id);
    190     char* localeID = (char*) uprv_malloc(idLen);
    191     int pos = 0;
    192     int canonCapacity = 0;
    193     char* canon = NULL;
    194     int canonLen = 0;
    195     /*int i;*/
    196     UErrorCode status = U_ZERO_ERROR;
    197     const char *ext = uprv_strchr(id, '.');
    198 
    199     if(ext != NULL){
    200         pos = (int) (ext - id);
    201     } else {
    202         pos = idLen;
    203     }
    204     uprv_memcpy(localeID, id, pos);
    205     localeID[pos]=0; /* NUL terminate the string */
    206 
    207     canonCapacity =pos*3;
    208     canon = (char*) uprv_malloc(canonCapacity);
    209     canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status);
    210 
    211     if(U_FAILURE(status)){
    212         fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status));
    213         exit(status);
    214     }
    215     strnrepchr(canon, canonLen, '_', '-');
    216     return canon;
    217 }
    218 
    219 static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
    220 #if 0
    221 static const char* bundleStart = "<xliff version = \"1.2\" "
    222                                         "xmlns='urn:oasis:names:tc:xliff:document:1.2' "
    223                                         "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
    224                                         "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n";
    225 #else
    226 static const char* bundleStart = "<xliff version = \"1.1\" "
    227                                         "xmlns='urn:oasis:names:tc:xliff:document:1.1' "
    228                                         "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
    229                                         "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n";
    230 #endif
    231 static const char* bundleEnd   = "</xliff>\n";
    232 
    233 void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status);
    234 
    235 static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength,
    236                               const UChar* src, int32_t srcLen, UErrorCode* status){
    237     int32_t srcIndex=0;
    238     char* dest=NULL;
    239     char* temp=NULL;
    240     int32_t destLen=0;
    241     UChar32 c = 0;
    242 
    243     if(status==NULL || U_FAILURE(*status) || pDest==NULL  || srcLen==0 || src == NULL){
    244         return NULL;
    245     }
    246     dest =*pDest;
    247     if(dest==NULL || destCap <=0){
    248         destCap = srcLen * 8;
    249         dest = (char*) uprv_malloc(sizeof(char) * destCap);
    250         if(dest==NULL){
    251             *status=U_MEMORY_ALLOCATION_ERROR;
    252             return NULL;
    253         }
    254     }
    255 
    256     dest[0]=0;
    257 
    258     while(srcIndex<srcLen){
    259         U16_NEXT(src, srcIndex, srcLen, c);
    260 
    261         if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
    262             *status = U_ILLEGAL_CHAR_FOUND;
    263             fprintf(stderr, "Illegal Surrogate! \n");
    264             uprv_free(dest);
    265             return NULL;
    266         }
    267 
    268         if((destLen+U8_LENGTH(c)) < destCap){
    269 
    270             /* ASCII Range */
    271             if(c <=0x007F){
    272                 switch(c) {
    273                 case '\x26':
    274                     uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &amp;*/
    275                     destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b");
    276                     break;
    277                 case '\x3c':
    278                     uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* &lt;*/
    279                     destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b");
    280                     break;
    281                 case '\x3e':
    282                     uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* &gt;*/
    283                     destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b");
    284                     break;
    285                 case '\x22':
    286                     uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* &quot;*/
    287                     destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b");
    288                     break;
    289                 case '\x27':
    290                     uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* &apos; */
    291                     destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b");
    292                     break;
    293 
    294                  /* Disallow C0 controls except TAB, CR, LF*/
    295                 case 0x00:
    296                 case 0x01:
    297                 case 0x02:
    298                 case 0x03:
    299                 case 0x04:
    300                 case 0x05:
    301                 case 0x06:
    302                 case 0x07:
    303                 case 0x08:
    304                 /*case 0x09:*/
    305                 /*case 0x0A: */
    306                 case 0x0B:
    307                 case 0x0C:
    308                 /*case 0x0D:*/
    309                 case 0x0E:
    310                 case 0x0F:
    311                 case 0x10:
    312                 case 0x11:
    313                 case 0x12:
    314                 case 0x13:
    315                 case 0x14:
    316                 case 0x15:
    317                 case 0x16:
    318                 case 0x17:
    319                 case 0x18:
    320                 case 0x19:
    321                 case 0x1A:
    322                 case 0x1B:
    323                 case 0x1C:
    324                 case 0x1D:
    325                 case 0x1E:
    326                 case 0x1F:
    327                     *status = U_ILLEGAL_CHAR_FOUND;
    328                     fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c);
    329                     uprv_free(dest);
    330                     return NULL;
    331                 default:
    332                     dest[destLen++]=(char)c;
    333                 }
    334             }else{
    335                 UBool isError = FALSE;
    336                 U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError);
    337                 if(isError){
    338                     *status = U_ILLEGAL_CHAR_FOUND;
    339                     fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c);
    340                     uprv_free(dest);
    341                     return NULL;
    342                 }
    343             }
    344         }else{
    345             destCap += destLen;
    346 
    347             temp = (char*) uprv_malloc(sizeof(char)*destCap);
    348             if(temp==NULL){
    349                 *status=U_MEMORY_ALLOCATION_ERROR;
    350                 uprv_free(dest);
    351                 return NULL;
    352             }
    353             uprv_memmove(temp,dest,destLen);
    354             destLen=0;
    355             uprv_free(dest);
    356             dest=temp;
    357             temp=NULL;
    358         }
    359 
    360     }
    361     *destLength = destLen;
    362     return dest;
    363 }
    364 
    365 #define ASTERISK 0x002A
    366 #define SPACE    0x0020
    367 #define CR       0x000A
    368 #define LF       0x000D
    369 #define AT_SIGN  0x0040
    370 
    371 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0
    372 static void
    373 trim(char **src, int32_t *len){
    374 
    375     char *s = NULL;
    376     int32_t i = 0;
    377     if(src == NULL || *src == NULL){
    378         return;
    379     }
    380     s = *src;
    381     /* trim from the end */
    382     for( i=(*len-1); i>= 0; i--){
    383         switch(s[i]){
    384         case ASTERISK:
    385         case SPACE:
    386         case CR:
    387         case LF:
    388             s[i] = 0;
    389             continue;
    390         default:
    391             break;
    392         }
    393         break;
    394 
    395     }
    396     *len = i+1;
    397 }
    398 
    399 static void
    400 print(UChar* src, int32_t srcLen,const char *tagStart,const char *tagEnd,  UErrorCode *status){
    401     int32_t bufCapacity   = srcLen*4;
    402     char *buf       = NULL;
    403     int32_t bufLen = 0;
    404 
    405     if(U_FAILURE(*status)){
    406         return;
    407     }
    408 
    409     buf = (char*) (uprv_malloc(bufCapacity));
    410     if(buf==0){
    411         fprintf(stderr, "Could not allocate memory!!");
    412         exit(U_MEMORY_ALLOCATION_ERROR);
    413     }
    414     buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status);
    415     if(U_SUCCESS(*status)){
    416         trim(&buf,&bufLen);
    417         write_utf8_file(out,UnicodeString(tagStart));
    418         write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8"));
    419         write_utf8_file(out,UnicodeString(tagEnd));
    420         write_utf8_file(out,UnicodeString("\n"));
    421 
    422     }
    423 }
    424 #endif
    425 
    426 static void
    427 printNoteElements(const UString *src, UErrorCode *status){
    428 
    429 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
    430 
    431     int32_t capacity = 0;
    432     UChar* note = NULL;
    433     int32_t noteLen = 0;
    434     int32_t count = 0,i;
    435 
    436     if(src == NULL){
    437         return;
    438     }
    439 
    440     capacity = src->fLength;
    441     note  = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
    442 
    443     count = getCount(src->fChars,src->fLength, UPC_NOTE, status);
    444     if(U_FAILURE(*status)){
    445         uprv_free(note);
    446         return;
    447     }
    448     for(i=0; i < count; i++){
    449         noteLen =  getAt(src->fChars,src->fLength, &note, capacity, i, UPC_NOTE, status);
    450         if(U_FAILURE(*status)){
    451             uprv_free(note);
    452             return;
    453         }
    454         if(noteLen > 0){
    455             write_tabs(out);
    456             print(note, noteLen,"<note>", "</note>", status);
    457         }
    458     }
    459     uprv_free(note);
    460 #else
    461 
    462     fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
    463 
    464 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
    465 
    466 }
    467 
    468 static void printAttribute(const char *name, const char *value, int32_t /*len*/)
    469 {
    470     write_utf8_file(out, UnicodeString(" "));
    471     write_utf8_file(out, UnicodeString(name));
    472     write_utf8_file(out, UnicodeString(" = \""));
    473     write_utf8_file(out, UnicodeString(value));
    474     write_utf8_file(out, UnicodeString("\""));
    475 }
    476 
    477 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
    478 static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/)
    479 {
    480     write_utf8_file(out, UnicodeString(" "));
    481     write_utf8_file(out, UnicodeString(name));
    482     write_utf8_file(out, UnicodeString(" = \""));
    483     write_utf8_file(out, value);
    484     write_utf8_file(out, UnicodeString("\""));
    485 }
    486 #endif
    487 
    488 static void
    489 printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){
    490 
    491 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
    492 
    493     if(status==NULL || U_FAILURE(*status)){
    494         return;
    495     }
    496 
    497     int32_t capacity = src->fLength + 1;
    498     char* buf = NULL;
    499     int32_t bufLen = 0;
    500     UChar* desc  = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
    501     UChar* trans = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
    502 
    503     int32_t descLen = 0, transLen=0;
    504     if(desc==NULL || trans==NULL){
    505         *status = U_MEMORY_ALLOCATION_ERROR;
    506         uprv_free(desc);
    507         uprv_free(trans);
    508         return;
    509     }
    510     // TODO: make src const, stop modifying it in-place, make printContainer() take const resource, etc.
    511     src->fLength = removeCmtText(src->fChars, src->fLength, status);
    512     descLen  = getDescription(src->fChars,src->fLength, &desc, capacity, status);
    513     transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status);
    514 
    515     /* first print translate attribute */
    516     if(transLen > 0){
    517         if(printTranslate){
    518             /* print translate attribute */
    519             buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status);
    520             if(U_SUCCESS(*status)){
    521                 printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen);
    522                 write_utf8_file(out,UnicodeString(">\n"));
    523             }
    524         }else if(getShowWarning()){
    525             fprintf(stderr, "Warning: Tranlate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName);
    526             /* no translate attribute .. just close the tag */
    527             write_utf8_file(out,UnicodeString(">\n"));
    528         }
    529     }else{
    530         /* no translate attribute .. just close the tag */
    531         write_utf8_file(out,UnicodeString(">\n"));
    532     }
    533 
    534     if(descLen > 0){
    535         write_tabs(out);
    536         print(desc, descLen, "<!--", "-->", status);
    537     }
    538 
    539     uprv_free(desc);
    540     uprv_free(trans);
    541 #else
    542 
    543     fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
    544 
    545 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
    546 
    547 }
    548 
    549 /*
    550  * Print out a containing element, like:
    551  * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no">
    552  * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array">
    553  */
    554 static char *printContainer(SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
    555 {
    556     const char *resname = NULL;
    557     char *sid = NULL;
    558 
    559     write_tabs(out);
    560 
    561     resname = res->getKeyString(srBundle);
    562     if (resname != NULL && *resname != 0) {
    563         sid = getID(id, resname, sid);
    564     } else {
    565         sid = getID(id, NULL, sid);
    566     }
    567 
    568     write_utf8_file(out, UnicodeString("<"));
    569     write_utf8_file(out, UnicodeString(container));
    570     printAttribute("id", sid, (int32_t) uprv_strlen(sid));
    571 
    572     if (resname != NULL) {
    573         printAttribute("resname", resname, (int32_t) uprv_strlen(resname));
    574     }
    575 
    576     if (mimetype != NULL) {
    577         printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype));
    578     }
    579 
    580     if (restype != NULL) {
    581         printAttribute("restype", restype, (int32_t) uprv_strlen(restype));
    582     }
    583 
    584     tabCount += 1;
    585     if (res->fComment.fLength > 0) {
    586         /* printComments will print the closing ">\n" */
    587         printComments(&res->fComment, resname, TRUE, status);
    588     } else {
    589         write_utf8_file(out, UnicodeString(">\n"));
    590     }
    591 
    592     return sid;
    593 }
    594 
    595 /* Writing Functions */
    596 
    597 static const char *trans_unit = "trans-unit";
    598 static const char *close_trans_unit = "</trans-unit>\n";
    599 static const char *source = "<source>";
    600 static const char *close_source = "</source>\n";
    601 static const char *group = "group";
    602 static const char *close_group = "</group>\n";
    603 
    604 static const char *bin_unit = "bin-unit";
    605 static const char *close_bin_unit = "</bin-unit>\n";
    606 static const char *bin_source = "<bin-source>\n";
    607 static const char *close_bin_source = "</bin-source>\n";
    608 static const char *external_file = "<external-file";
    609 /*static const char *close_external_file = "</external-file>\n";*/
    610 static const char *internal_file = "<internal-file";
    611 static const char *close_internal_file = "</internal-file>\n";
    612 
    613 static const char *application_mimetype = "application"; /* add "/octet-stream"? */
    614 
    615 static const char *alias_restype     = "x-icu-alias";
    616 static const char *array_restype     = "x-icu-array";
    617 static const char *binary_restype    = "x-icu-binary";
    618 static const char *integer_restype   = "x-icu-integer";
    619 static const char *intvector_restype = "x-icu-intvector";
    620 static const char *table_restype     = "x-icu-table";
    621 
    622 static void
    623 string_write_xml(StringResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    624 
    625     char *sid = NULL;
    626     char* buf = NULL;
    627     int32_t bufLen = 0;
    628 
    629     if(status==NULL || U_FAILURE(*status)){
    630         return;
    631     }
    632 
    633     sid = printContainer(res, trans_unit, NULL, NULL, id, status);
    634 
    635     write_tabs(out);
    636 
    637     write_utf8_file(out, UnicodeString(source));
    638 
    639     buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
    640 
    641     if (U_FAILURE(*status)) {
    642         return;
    643     }
    644 
    645     write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
    646     write_utf8_file(out, UnicodeString(close_source));
    647 
    648     printNoteElements(&res->fComment, status);
    649 
    650     tabCount -= 1;
    651     write_tabs(out);
    652 
    653     write_utf8_file(out, UnicodeString(close_trans_unit));
    654 
    655     uprv_free(buf);
    656     uprv_free(sid);
    657 }
    658 
    659 static void
    660 alias_write_xml(AliasResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    661     char *sid = NULL;
    662     char* buf = NULL;
    663     int32_t bufLen=0;
    664 
    665     sid = printContainer(res, trans_unit, alias_restype, NULL, id, status);
    666 
    667     write_tabs(out);
    668 
    669     write_utf8_file(out, UnicodeString(source));
    670 
    671     buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
    672 
    673     if(U_FAILURE(*status)){
    674         return;
    675     }
    676     write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
    677     write_utf8_file(out, UnicodeString(close_source));
    678 
    679     printNoteElements(&res->fComment, status);
    680 
    681     tabCount -= 1;
    682     write_tabs(out);
    683 
    684     write_utf8_file(out, UnicodeString(close_trans_unit));
    685 
    686     uprv_free(buf);
    687     uprv_free(sid);
    688 }
    689 
    690 static void
    691 array_write_xml(ArrayResource *res, const char* id, const char* language, UErrorCode *status) {
    692     char* sid = NULL;
    693     int index = 0;
    694 
    695     struct SResource *current = NULL;
    696 
    697     sid = printContainer(res, group, array_restype, NULL, id, status);
    698 
    699     current = res->fFirst;
    700 
    701     while (current != NULL) {
    702         char c[256] = {0};
    703         char* subId = NULL;
    704 
    705         itostr(c, index, 10, 0);
    706         index += 1;
    707         subId = getID(sid, c, subId);
    708 
    709         res_write_xml(current, subId, language, FALSE, status);
    710         uprv_free(subId);
    711         subId = NULL;
    712 
    713         if(U_FAILURE(*status)){
    714             return;
    715         }
    716 
    717         current = current->fNext;
    718     }
    719 
    720     tabCount -= 1;
    721     write_tabs(out);
    722     write_utf8_file(out, UnicodeString(close_group));
    723 
    724     uprv_free(sid);
    725 }
    726 
    727 static void
    728 intvector_write_xml(IntVectorResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    729     char* sid = NULL;
    730     char* ivd = NULL;
    731     uint32_t i=0;
    732     uint32_t len=0;
    733     char buf[256] = {'0'};
    734 
    735     sid = printContainer(res, group, intvector_restype, NULL, id, status);
    736 
    737     for(i = 0; i < res->fCount; i += 1) {
    738         char c[256] = {0};
    739 
    740         itostr(c, i, 10, 0);
    741         ivd = getID(sid, c, ivd);
    742         len = itostr(buf, res->fArray[i], 10, 0);
    743 
    744         write_tabs(out);
    745         write_utf8_file(out, UnicodeString("<"));
    746         write_utf8_file(out, UnicodeString(trans_unit));
    747 
    748         printAttribute("id", ivd, (int32_t)uprv_strlen(ivd));
    749         printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype));
    750 
    751         write_utf8_file(out, UnicodeString(">\n"));
    752 
    753         tabCount += 1;
    754         write_tabs(out);
    755         write_utf8_file(out, UnicodeString(source));
    756 
    757         write_utf8_file(out, UnicodeString(buf, len));
    758 
    759         write_utf8_file(out, UnicodeString(close_source));
    760         tabCount -= 1;
    761         write_tabs(out);
    762         write_utf8_file(out, UnicodeString(close_trans_unit));
    763 
    764         uprv_free(ivd);
    765         ivd = NULL;
    766     }
    767 
    768     tabCount -= 1;
    769     write_tabs(out);
    770 
    771     write_utf8_file(out, UnicodeString(close_group));
    772     uprv_free(sid);
    773     sid = NULL;
    774 }
    775 
    776 static void
    777 int_write_xml(IntResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    778     char* sid = NULL;
    779     char buf[256] = {0};
    780     uint32_t len = 0;
    781 
    782     sid = printContainer(res, trans_unit, integer_restype, NULL, id, status);
    783 
    784     write_tabs(out);
    785 
    786     write_utf8_file(out, UnicodeString(source));
    787 
    788     len = itostr(buf, res->fValue, 10, 0);
    789     write_utf8_file(out, UnicodeString(buf, len));
    790 
    791     write_utf8_file(out, UnicodeString(close_source));
    792 
    793     printNoteElements(&res->fComment, status);
    794 
    795     tabCount -= 1;
    796     write_tabs(out);
    797 
    798     write_utf8_file(out, UnicodeString(close_trans_unit));
    799 
    800     uprv_free(sid);
    801     sid = NULL;
    802 }
    803 
    804 static void
    805 bin_write_xml(BinaryResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
    806     const char* m_type = application_mimetype;
    807     char* sid = NULL;
    808     uint32_t crc = 0xFFFFFFFF;
    809 
    810     char fileName[1024] ={0};
    811     int32_t tLen = ( outDir == NULL) ? 0 :(int32_t)uprv_strlen(outDir);
    812     char* fn =  (char*) uprv_malloc(sizeof(char) * (tLen+1024 +
    813                                                     (res->fFileName !=NULL ?
    814                                                     uprv_strlen(res->fFileName) :0)));
    815     const char* ext = NULL;
    816 
    817     char* f = NULL;
    818 
    819     fn[0]=0;
    820 
    821     if(res->fFileName != NULL){
    822         uprv_strcpy(fileName, res->fFileName);
    823         f = uprv_strrchr(fileName, '\\');
    824 
    825         if (f != NULL) {
    826             f++;
    827         } else {
    828             f = fileName;
    829         }
    830 
    831         ext = uprv_strrchr(fileName, '.');
    832 
    833         if (ext == NULL) {
    834             fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName);
    835             exit(U_ILLEGAL_ARGUMENT_ERROR);
    836         }
    837 
    838         if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){
    839             m_type = "image";
    840         } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){
    841             m_type = "audio";
    842         } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){
    843             m_type = "video";
    844         } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){
    845             m_type = "text";
    846         }
    847 
    848         sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
    849 
    850         write_tabs(out);
    851 
    852         write_utf8_file(out, UnicodeString(bin_source));
    853 
    854         tabCount+= 1;
    855         write_tabs(out);
    856 
    857         write_utf8_file(out, UnicodeString(external_file));
    858         printAttribute("href", f, (int32_t)uprv_strlen(f));
    859         write_utf8_file(out, UnicodeString("/>\n"));
    860         tabCount -= 1;
    861         write_tabs(out);
    862 
    863         write_utf8_file(out, UnicodeString(close_bin_source));
    864 
    865         printNoteElements(&res->fComment, status);
    866         tabCount -= 1;
    867         write_tabs(out);
    868         write_utf8_file(out, UnicodeString(close_bin_unit));
    869     } else {
    870         char temp[256] = {0};
    871         uint32_t i = 0;
    872         int32_t len=0;
    873 
    874         sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
    875 
    876         write_tabs(out);
    877         write_utf8_file(out, UnicodeString(bin_source));
    878 
    879         tabCount += 1;
    880         write_tabs(out);
    881 
    882         write_utf8_file(out, UnicodeString(internal_file));
    883         printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype));
    884 
    885         while(i <res->fLength){
    886             len = itostr(temp, res->fData[i], 16, 2);
    887             crc = computeCRC(temp, len, crc);
    888             i++;
    889         }
    890 
    891         len = itostr(temp, crc, 10, 0);
    892         printAttribute("crc", temp, len);
    893 
    894         write_utf8_file(out, UnicodeString(">"));
    895 
    896         i = 0;
    897         while(i <res->fLength){
    898             len = itostr(temp, res->fData[i], 16, 2);
    899             write_utf8_file(out, UnicodeString(temp));
    900             i += 1;
    901         }
    902 
    903         write_utf8_file(out, UnicodeString(close_internal_file));
    904 
    905         tabCount -= 2;
    906         write_tabs(out);
    907 
    908         write_utf8_file(out, UnicodeString(close_bin_source));
    909         printNoteElements(&res->fComment, status);
    910 
    911         tabCount -= 1;
    912         write_tabs(out);
    913         write_utf8_file(out, UnicodeString(close_bin_unit));
    914 
    915         uprv_free(sid);
    916         sid = NULL;
    917     }
    918 
    919     uprv_free(fn);
    920 }
    921 
    922 
    923 
    924 static void
    925 table_write_xml(TableResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
    926 
    927     uint32_t  i         = 0;
    928 
    929     struct SResource *current = NULL;
    930     char* sid = NULL;
    931 
    932     if (U_FAILURE(*status)) {
    933         return ;
    934     }
    935 
    936     sid = printContainer(res, group, table_restype, NULL, id, status);
    937 
    938     if(isTopLevel) {
    939         sid[0] = '\0';
    940     }
    941 
    942     current = res->fFirst;
    943     i = 0;
    944 
    945     while (current != NULL) {
    946         res_write_xml(current, sid, language, FALSE, status);
    947 
    948         if(U_FAILURE(*status)){
    949             return;
    950         }
    951 
    952         i += 1;
    953         current = current->fNext;
    954     }
    955 
    956     tabCount -= 1;
    957     write_tabs(out);
    958 
    959     write_utf8_file(out, UnicodeString(close_group));
    960 
    961     uprv_free(sid);
    962     sid = NULL;
    963 }
    964 
    965 void
    966 res_write_xml(struct SResource *res, const char* id,  const char* language, UBool isTopLevel, UErrorCode *status) {
    967 
    968     if (U_FAILURE(*status)) {
    969         return ;
    970     }
    971 
    972     if (res != NULL) {
    973         switch (res->fType) {
    974         case URES_STRING:
    975              string_write_xml    (static_cast<StringResource *>(res), id, language, status);
    976              return;
    977 
    978         case URES_ALIAS:
    979              alias_write_xml     (static_cast<AliasResource *>(res), id, language, status);
    980              return;
    981 
    982         case URES_INT_VECTOR:
    983              intvector_write_xml (static_cast<IntVectorResource *>(res), id, language, status);
    984              return;
    985 
    986         case URES_BINARY:
    987              bin_write_xml       (static_cast<BinaryResource *>(res), id, language, status);
    988              return;
    989 
    990         case URES_INT:
    991              int_write_xml       (static_cast<IntResource *>(res), id, language, status);
    992              return;
    993 
    994         case URES_ARRAY:
    995              array_write_xml     (static_cast<ArrayResource *>(res), id, language, status);
    996              return;
    997 
    998         case URES_TABLE:
    999              table_write_xml     (static_cast<TableResource *>(res), id, language, isTopLevel, status);
   1000              return;
   1001 
   1002         default:
   1003             break;
   1004         }
   1005     }
   1006 
   1007     *status = U_INTERNAL_PROGRAM_ERROR;
   1008 }
   1009 
   1010 void
   1011 bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename,
   1012                   char *writtenFilename, int writtenFilenameLen,
   1013                   const char* language, const char* outFileName, UErrorCode *status) {
   1014 
   1015     char* xmlfileName = NULL;
   1016     char* outputFileName = NULL;
   1017     char* originalFileName = NULL;
   1018     const char* fileStart = "<file xml:space = \"preserve\" source-language = \"";
   1019     const char* file1 = "\" datatype = \"x-icu-resource-bundle\" ";
   1020     const char* file2 = "original = \"";
   1021     const char* file4 = "\" date = \"";
   1022     const char* fileEnd = "</file>\n";
   1023     const char* headerStart = "<header>\n";
   1024     const char* headerEnd = "</header>\n";
   1025     const char* bodyStart = "<body>\n";
   1026     const char* bodyEnd = "</body>\n";
   1027 
   1028     const char *tool_start = "<tool";
   1029     const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION;
   1030     const char *tool_name = "genrb";
   1031 
   1032     char* temp = NULL;
   1033     char* lang = NULL;
   1034     const char* pos = NULL;
   1035     int32_t first, index;
   1036     time_t currTime;
   1037     char timeBuf[128];
   1038 
   1039     outDir = outputDir;
   1040 
   1041     srBundle = bundle;
   1042 
   1043     pos = uprv_strrchr(filename, '\\');
   1044     if(pos != NULL) {
   1045         first = (int32_t)(pos - filename + 1);
   1046     } else {
   1047         first = 0;
   1048     }
   1049     index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first);
   1050     originalFileName = (char *)uprv_malloc(sizeof(char)*index+1);
   1051     uprv_memset(originalFileName, 0, sizeof(char)*index+1);
   1052     uprv_strncpy(originalFileName, filename + first, index);
   1053 
   1054     if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) {
   1055         fprintf(stdout, "Warning: The file name is not same as the resource name!\n");
   1056     }
   1057 
   1058     temp = originalFileName;
   1059     originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
   1060     uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
   1061     uprv_strcat(originalFileName, temp);
   1062     uprv_strcat(originalFileName, textExt);
   1063     uprv_free(temp);
   1064     temp = NULL;
   1065 
   1066 
   1067     if (language == NULL) {
   1068 /*        lang = parseFilename(filename, lang);
   1069         if (lang == NULL) {*/
   1070             /* now check if locale name is valid or not
   1071              * this is to cater for situation where
   1072              * pegasusServer.txt contains
   1073              *
   1074              * en{
   1075              *      ..
   1076              * }
   1077              */
   1078              lang = parseFilename(srBundle->fLocale, lang);
   1079              /*
   1080               * Neither  the file name nor the table name inside the
   1081               * txt file contain a valid country and language codes
   1082               * throw an error.
   1083               * pegasusServer.txt contains
   1084               *
   1085               * testelements{
   1086               *     ....
   1087               * }
   1088               */
   1089              if(lang==NULL){
   1090                  fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n");
   1091                  exit(U_ILLEGAL_ARGUMENT_ERROR);
   1092              }
   1093        /* }*/
   1094     } else {
   1095         lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1);
   1096         uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1);
   1097         uprv_strcpy(lang, language);
   1098     }
   1099 
   1100     if(outFileName) {
   1101         outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1);
   1102         uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1);
   1103         uprv_strcpy(outputFileName,outFileName);
   1104     } else {
   1105         outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
   1106         uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
   1107         uprv_strcpy(outputFileName,srBundle->fLocale);
   1108     }
   1109 
   1110     if(outputDir) {
   1111         xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
   1112         uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
   1113     } else {
   1114         xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
   1115         uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
   1116     }
   1117 
   1118     if(outputDir){
   1119         uprv_strcpy(xmlfileName, outputDir);
   1120         if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
   1121             uprv_strcat(xmlfileName,U_FILE_SEP_STRING);
   1122         }
   1123     }
   1124     uprv_strcat(xmlfileName,outputFileName);
   1125     uprv_strcat(xmlfileName,xliffExt);
   1126 
   1127     if (writtenFilename) {
   1128         uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen);
   1129     }
   1130 
   1131     if (U_FAILURE(*status)) {
   1132         goto cleanup_bundle_write_xml;
   1133     }
   1134 
   1135     out= T_FileStream_open(xmlfileName,"w");
   1136 
   1137     if(out==NULL){
   1138         *status = U_FILE_ACCESS_ERROR;
   1139         goto cleanup_bundle_write_xml;
   1140     }
   1141     write_utf8_file(out, UnicodeString(xmlHeader));
   1142 
   1143     if(outputEnc && *outputEnc!='\0'){
   1144         /* store the output encoding */
   1145         enc = outputEnc;
   1146         conv=ucnv_open(enc,status);
   1147         if(U_FAILURE(*status)){
   1148             goto cleanup_bundle_write_xml;
   1149         }
   1150     }
   1151     write_utf8_file(out, UnicodeString(bundleStart));
   1152     write_tabs(out);
   1153     write_utf8_file(out, UnicodeString(fileStart));
   1154     /* check if lang and language are the same */
   1155     if(language != NULL && uprv_strcmp(lang, srBundle->fLocale)!=0){
   1156         fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n");
   1157     }
   1158     write_utf8_file(out, UnicodeString(lang));
   1159     write_utf8_file(out, UnicodeString(file1));
   1160     write_utf8_file(out, UnicodeString(file2));
   1161     write_utf8_file(out, UnicodeString(originalFileName));
   1162     write_utf8_file(out, UnicodeString(file4));
   1163 
   1164     time(&currTime);
   1165     strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime));
   1166     write_utf8_file(out, UnicodeString(timeBuf));
   1167     write_utf8_file(out, UnicodeString("\">\n"));
   1168 
   1169     tabCount += 1;
   1170     write_tabs(out);
   1171     write_utf8_file(out, UnicodeString(headerStart));
   1172 
   1173     tabCount += 1;
   1174     write_tabs(out);
   1175 
   1176     write_utf8_file(out, UnicodeString(tool_start));
   1177     printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id));
   1178     printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name));
   1179     write_utf8_file(out, UnicodeString("/>\n"));
   1180 
   1181     tabCount -= 1;
   1182     write_tabs(out);
   1183 
   1184     write_utf8_file(out, UnicodeString(headerEnd));
   1185 
   1186     write_tabs(out);
   1187     tabCount += 1;
   1188 
   1189     write_utf8_file(out, UnicodeString(bodyStart));
   1190 
   1191 
   1192     res_write_xml(bundle->fRoot, bundle->fLocale, lang, TRUE, status);
   1193 
   1194     tabCount -= 1;
   1195     write_tabs(out);
   1196 
   1197     write_utf8_file(out, UnicodeString(bodyEnd));
   1198     tabCount--;
   1199     write_tabs(out);
   1200     write_utf8_file(out, UnicodeString(fileEnd));
   1201     tabCount--;
   1202     write_tabs(out);
   1203     write_utf8_file(out, UnicodeString(bundleEnd));
   1204     T_FileStream_close(out);
   1205 
   1206     ucnv_close(conv);
   1207 
   1208 cleanup_bundle_write_xml:
   1209     uprv_free(originalFileName);
   1210     uprv_free(lang);
   1211     if(xmlfileName != NULL) {
   1212         uprv_free(xmlfileName);
   1213     }
   1214     if(outputFileName != NULL){
   1215         uprv_free(outputFileName);
   1216     }
   1217 }
   1218