1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2002-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * 11 * File wrtxml.cpp 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 10/01/02 Ram Creation. 17 * 02/07/08 Spieth Correct XLIFF generation on EBCDIC platform 18 * 19 ******************************************************************************* 20 */ 21 22 // Safer use of UnicodeString. 23 #ifndef UNISTR_FROM_CHAR_EXPLICIT 24 # define UNISTR_FROM_CHAR_EXPLICIT explicit 25 #endif 26 27 // Less important, but still a good idea. 28 #ifndef UNISTR_FROM_STRING_EXPLICIT 29 # define UNISTR_FROM_STRING_EXPLICIT explicit 30 #endif 31 32 #include "reslist.h" 33 #include "unewdata.h" 34 #include "unicode/ures.h" 35 #include "errmsg.h" 36 #include "filestrm.h" 37 #include "cstring.h" 38 #include "unicode/ucnv.h" 39 #include "genrb.h" 40 #include "rle.h" 41 #include "uhash.h" 42 #include "uresimp.h" 43 #include "unicode/ustring.h" 44 #include "unicode/uchar.h" 45 #include "ustr.h" 46 #include "prscmnts.h" 47 #include "unicode/unistr.h" 48 #include "unicode/utf8.h" 49 #include "unicode/utf16.h" 50 #include <time.h> 51 52 U_NAMESPACE_USE 53 54 static int tabCount = 0; 55 56 static FileStream* out=NULL; 57 static struct SRBRoot* srBundle ; 58 static const char* outDir = NULL; 59 static const char* enc =""; 60 static UConverter* conv = NULL; 61 62 const char* const* ISOLanguages; 63 const char* const* ISOCountries; 64 const char* textExt = ".txt"; 65 const char* xliffExt = ".xlf"; 66 67 static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString) 68 { 69 UErrorCode status = U_ZERO_ERROR; 70 int32_t len = 0; 71 72 // preflight to get the destination buffer size 73 u_strToUTF8(NULL, 74 0, 75 &len, 76 toUCharPtr(outString.getBuffer()), 77 outString.length(), 78 &status); 79 80 // allocate the buffer 81 char* dest = (char*)uprv_malloc(len); 82 status = U_ZERO_ERROR; 83 84 // convert the data 85 u_strToUTF8(dest, 86 len, 87 &len, 88 toUCharPtr(outString.getBuffer()), 89 outString.length(), 90 &status); 91 92 // write data to out file 93 int32_t ret = T_FileStream_write(fileStream, dest, len); 94 uprv_free(dest); 95 return (ret); 96 } 97 98 /*write indentation for formatting*/ 99 static void write_tabs(FileStream* os){ 100 int i=0; 101 for(;i<=tabCount;i++){ 102 write_utf8_file(os,UnicodeString(" ")); 103 } 104 } 105 106 /*get ID for each element. ID is globally unique.*/ 107 static char* getID(const char* id, const char* curKey, char* result) { 108 if(curKey == NULL) { 109 result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1); 110 uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1); 111 uprv_strcpy(result, id); 112 } else { 113 result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1); 114 uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1); 115 if(id[0]!='\0'){ 116 uprv_strcpy(result, id); 117 uprv_strcat(result, "_"); 118 } 119 uprv_strcat(result, curKey); 120 } 121 return result; 122 } 123 124 /*compute CRC for binary code*/ 125 /* The code is from http://www.theorem.com/java/CRC32.java 126 * Calculates the CRC32 - 32 bit Cyclical Redundancy Check 127 * <P> This check is used in numerous systems to verify the integrity 128 * of information. It's also used as a hashing function. Unlike a regular 129 * checksum, it's sensitive to the order of the characters. 130 * It produces a 32 bit 131 * 132 * @author Michael Lecuyer (mjl (at) theorem.com) 133 * @version 1.1 August 11, 1998 134 */ 135 136 /* ICU is not endian portable, because ICU data generated on big endian machines can be 137 * ported to big endian machines but not to little endian machines and vice versa. The 138 * conversion is not portable across platforms with different endianess. 139 */ 140 141 uint32_t computeCRC(const char *ptr, uint32_t len, uint32_t lastcrc){ 142 int32_t crc; 143 uint32_t temp1; 144 uint32_t temp2; 145 146 int32_t crc_ta[256]; 147 int i = 0; 148 int j = 0; 149 uint32_t crc2 = 0; 150 151 #define CRC32_POLYNOMIAL 0xEDB88320 152 153 /*build crc table*/ 154 for (i = 0; i <= 255; i++) { 155 crc2 = i; 156 for (j = 8; j > 0; j--) { 157 if ((crc2 & 1) == 1) { 158 crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL; 159 } else { 160 crc2 >>= 1; 161 } 162 } 163 crc_ta[i] = crc2; 164 } 165 166 crc = lastcrc; 167 while(len--!=0) { 168 temp1 = (uint32_t)crc>>8; 169 temp2 = crc_ta[(crc^*ptr) & 0xFF]; 170 crc = temp1^temp2; 171 ptr++; 172 } 173 return(crc); 174 } 175 176 static void strnrepchr(char* src, int32_t srcLen, char s, char r){ 177 int32_t i = 0; 178 for(i=0;i<srcLen;i++){ 179 if(src[i]==s){ 180 src[i]=r; 181 } 182 } 183 } 184 /* Parse the filename, and get its language information. 185 * If it fails to get the language information from the filename, 186 * use "en" as the default value for language 187 */ 188 static char* parseFilename(const char* id, char* /*lang*/) { 189 int idLen = (int) uprv_strlen(id); 190 char* localeID = (char*) uprv_malloc(idLen); 191 int pos = 0; 192 int canonCapacity = 0; 193 char* canon = NULL; 194 int canonLen = 0; 195 /*int i;*/ 196 UErrorCode status = U_ZERO_ERROR; 197 const char *ext = uprv_strchr(id, '.'); 198 199 if(ext != NULL){ 200 pos = (int) (ext - id); 201 } else { 202 pos = idLen; 203 } 204 uprv_memcpy(localeID, id, pos); 205 localeID[pos]=0; /* NUL terminate the string */ 206 207 canonCapacity =pos*3; 208 canon = (char*) uprv_malloc(canonCapacity); 209 canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status); 210 211 if(U_FAILURE(status)){ 212 fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status)); 213 exit(status); 214 } 215 strnrepchr(canon, canonLen, '_', '-'); 216 return canon; 217 } 218 219 static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"; 220 #if 0 221 static const char* bundleStart = "<xliff version = \"1.2\" " 222 "xmlns='urn:oasis:names:tc:xliff:document:1.2' " 223 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' " 224 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n"; 225 #else 226 static const char* bundleStart = "<xliff version = \"1.1\" " 227 "xmlns='urn:oasis:names:tc:xliff:document:1.1' " 228 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' " 229 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n"; 230 #endif 231 static const char* bundleEnd = "</xliff>\n"; 232 233 void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status); 234 235 static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength, 236 const UChar* src, int32_t srcLen, UErrorCode* status){ 237 int32_t srcIndex=0; 238 char* dest=NULL; 239 char* temp=NULL; 240 int32_t destLen=0; 241 UChar32 c = 0; 242 243 if(status==NULL || U_FAILURE(*status) || pDest==NULL || srcLen==0 || src == NULL){ 244 return NULL; 245 } 246 dest =*pDest; 247 if(dest==NULL || destCap <=0){ 248 destCap = srcLen * 8; 249 dest = (char*) uprv_malloc(sizeof(char) * destCap); 250 if(dest==NULL){ 251 *status=U_MEMORY_ALLOCATION_ERROR; 252 return NULL; 253 } 254 } 255 256 dest[0]=0; 257 258 while(srcIndex<srcLen){ 259 U16_NEXT(src, srcIndex, srcLen, c); 260 261 if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) { 262 *status = U_ILLEGAL_CHAR_FOUND; 263 fprintf(stderr, "Illegal Surrogate! \n"); 264 uprv_free(dest); 265 return NULL; 266 } 267 268 if((destLen+U8_LENGTH(c)) < destCap){ 269 270 /* ASCII Range */ 271 if(c <=0x007F){ 272 switch(c) { 273 case '\x26': 274 uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &*/ 275 destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b"); 276 break; 277 case '\x3c': 278 uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* <*/ 279 destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b"); 280 break; 281 case '\x3e': 282 uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* >*/ 283 destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b"); 284 break; 285 case '\x22': 286 uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* "*/ 287 destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b"); 288 break; 289 case '\x27': 290 uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* ' */ 291 destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b"); 292 break; 293 294 /* Disallow C0 controls except TAB, CR, LF*/ 295 case 0x00: 296 case 0x01: 297 case 0x02: 298 case 0x03: 299 case 0x04: 300 case 0x05: 301 case 0x06: 302 case 0x07: 303 case 0x08: 304 /*case 0x09:*/ 305 /*case 0x0A: */ 306 case 0x0B: 307 case 0x0C: 308 /*case 0x0D:*/ 309 case 0x0E: 310 case 0x0F: 311 case 0x10: 312 case 0x11: 313 case 0x12: 314 case 0x13: 315 case 0x14: 316 case 0x15: 317 case 0x16: 318 case 0x17: 319 case 0x18: 320 case 0x19: 321 case 0x1A: 322 case 0x1B: 323 case 0x1C: 324 case 0x1D: 325 case 0x1E: 326 case 0x1F: 327 *status = U_ILLEGAL_CHAR_FOUND; 328 fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c); 329 uprv_free(dest); 330 return NULL; 331 default: 332 dest[destLen++]=(char)c; 333 } 334 }else{ 335 UBool isError = FALSE; 336 U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError); 337 if(isError){ 338 *status = U_ILLEGAL_CHAR_FOUND; 339 fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c); 340 uprv_free(dest); 341 return NULL; 342 } 343 } 344 }else{ 345 destCap += destLen; 346 347 temp = (char*) uprv_malloc(sizeof(char)*destCap); 348 if(temp==NULL){ 349 *status=U_MEMORY_ALLOCATION_ERROR; 350 uprv_free(dest); 351 return NULL; 352 } 353 uprv_memmove(temp,dest,destLen); 354 destLen=0; 355 uprv_free(dest); 356 dest=temp; 357 temp=NULL; 358 } 359 360 } 361 *destLength = destLen; 362 return dest; 363 } 364 365 #define ASTERISK 0x002A 366 #define SPACE 0x0020 367 #define CR 0x000A 368 #define LF 0x000D 369 #define AT_SIGN 0x0040 370 371 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 372 static void 373 trim(char **src, int32_t *len){ 374 375 char *s = NULL; 376 int32_t i = 0; 377 if(src == NULL || *src == NULL){ 378 return; 379 } 380 s = *src; 381 /* trim from the end */ 382 for( i=(*len-1); i>= 0; i--){ 383 switch(s[i]){ 384 case ASTERISK: 385 case SPACE: 386 case CR: 387 case LF: 388 s[i] = 0; 389 continue; 390 default: 391 break; 392 } 393 break; 394 395 } 396 *len = i+1; 397 } 398 399 static void 400 print(UChar* src, int32_t srcLen,const char *tagStart,const char *tagEnd, UErrorCode *status){ 401 int32_t bufCapacity = srcLen*4; 402 char *buf = NULL; 403 int32_t bufLen = 0; 404 405 if(U_FAILURE(*status)){ 406 return; 407 } 408 409 buf = (char*) (uprv_malloc(bufCapacity)); 410 if(buf==0){ 411 fprintf(stderr, "Could not allocate memory!!"); 412 exit(U_MEMORY_ALLOCATION_ERROR); 413 } 414 buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status); 415 if(U_SUCCESS(*status)){ 416 trim(&buf,&bufLen); 417 write_utf8_file(out,UnicodeString(tagStart)); 418 write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8")); 419 write_utf8_file(out,UnicodeString(tagEnd)); 420 write_utf8_file(out,UnicodeString("\n")); 421 422 } 423 } 424 #endif 425 426 static void 427 printNoteElements(const UString *src, UErrorCode *status){ 428 429 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */ 430 431 int32_t capacity = 0; 432 UChar* note = NULL; 433 int32_t noteLen = 0; 434 int32_t count = 0,i; 435 436 if(src == NULL){ 437 return; 438 } 439 440 capacity = src->fLength; 441 note = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity); 442 443 count = getCount(src->fChars,src->fLength, UPC_NOTE, status); 444 if(U_FAILURE(*status)){ 445 uprv_free(note); 446 return; 447 } 448 for(i=0; i < count; i++){ 449 noteLen = getAt(src->fChars,src->fLength, ¬e, capacity, i, UPC_NOTE, status); 450 if(U_FAILURE(*status)){ 451 uprv_free(note); 452 return; 453 } 454 if(noteLen > 0){ 455 write_tabs(out); 456 print(note, noteLen,"<note>", "</note>", status); 457 } 458 } 459 uprv_free(note); 460 #else 461 462 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n"); 463 464 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ 465 466 } 467 468 static void printAttribute(const char *name, const char *value, int32_t /*len*/) 469 { 470 write_utf8_file(out, UnicodeString(" ")); 471 write_utf8_file(out, UnicodeString(name)); 472 write_utf8_file(out, UnicodeString(" = \"")); 473 write_utf8_file(out, UnicodeString(value)); 474 write_utf8_file(out, UnicodeString("\"")); 475 } 476 477 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */ 478 static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/) 479 { 480 write_utf8_file(out, UnicodeString(" ")); 481 write_utf8_file(out, UnicodeString(name)); 482 write_utf8_file(out, UnicodeString(" = \"")); 483 write_utf8_file(out, value); 484 write_utf8_file(out, UnicodeString("\"")); 485 } 486 #endif 487 488 static void 489 printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){ 490 491 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */ 492 493 if(status==NULL || U_FAILURE(*status)){ 494 return; 495 } 496 497 int32_t capacity = src->fLength + 1; 498 char* buf = NULL; 499 int32_t bufLen = 0; 500 UChar* desc = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity); 501 UChar* trans = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity); 502 503 int32_t descLen = 0, transLen=0; 504 if(desc==NULL || trans==NULL){ 505 *status = U_MEMORY_ALLOCATION_ERROR; 506 uprv_free(desc); 507 uprv_free(trans); 508 return; 509 } 510 // TODO: make src const, stop modifying it in-place, make printContainer() take const resource, etc. 511 src->fLength = removeCmtText(src->fChars, src->fLength, status); 512 descLen = getDescription(src->fChars,src->fLength, &desc, capacity, status); 513 transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status); 514 515 /* first print translate attribute */ 516 if(transLen > 0){ 517 if(printTranslate){ 518 /* print translate attribute */ 519 buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status); 520 if(U_SUCCESS(*status)){ 521 printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen); 522 write_utf8_file(out,UnicodeString(">\n")); 523 } 524 }else if(getShowWarning()){ 525 fprintf(stderr, "Warning: Tranlate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName); 526 /* no translate attribute .. just close the tag */ 527 write_utf8_file(out,UnicodeString(">\n")); 528 } 529 }else{ 530 /* no translate attribute .. just close the tag */ 531 write_utf8_file(out,UnicodeString(">\n")); 532 } 533 534 if(descLen > 0){ 535 write_tabs(out); 536 print(desc, descLen, "<!--", "-->", status); 537 } 538 539 uprv_free(desc); 540 uprv_free(trans); 541 #else 542 543 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n"); 544 545 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ 546 547 } 548 549 /* 550 * Print out a containing element, like: 551 * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no"> 552 * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array"> 553 */ 554 static char *printContainer(SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status) 555 { 556 const char *resname = NULL; 557 char *sid = NULL; 558 559 write_tabs(out); 560 561 resname = res->getKeyString(srBundle); 562 if (resname != NULL && *resname != 0) { 563 sid = getID(id, resname, sid); 564 } else { 565 sid = getID(id, NULL, sid); 566 } 567 568 write_utf8_file(out, UnicodeString("<")); 569 write_utf8_file(out, UnicodeString(container)); 570 printAttribute("id", sid, (int32_t) uprv_strlen(sid)); 571 572 if (resname != NULL) { 573 printAttribute("resname", resname, (int32_t) uprv_strlen(resname)); 574 } 575 576 if (mimetype != NULL) { 577 printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype)); 578 } 579 580 if (restype != NULL) { 581 printAttribute("restype", restype, (int32_t) uprv_strlen(restype)); 582 } 583 584 tabCount += 1; 585 if (res->fComment.fLength > 0) { 586 /* printComments will print the closing ">\n" */ 587 printComments(&res->fComment, resname, TRUE, status); 588 } else { 589 write_utf8_file(out, UnicodeString(">\n")); 590 } 591 592 return sid; 593 } 594 595 /* Writing Functions */ 596 597 static const char *trans_unit = "trans-unit"; 598 static const char *close_trans_unit = "</trans-unit>\n"; 599 static const char *source = "<source>"; 600 static const char *close_source = "</source>\n"; 601 static const char *group = "group"; 602 static const char *close_group = "</group>\n"; 603 604 static const char *bin_unit = "bin-unit"; 605 static const char *close_bin_unit = "</bin-unit>\n"; 606 static const char *bin_source = "<bin-source>\n"; 607 static const char *close_bin_source = "</bin-source>\n"; 608 static const char *external_file = "<external-file"; 609 /*static const char *close_external_file = "</external-file>\n";*/ 610 static const char *internal_file = "<internal-file"; 611 static const char *close_internal_file = "</internal-file>\n"; 612 613 static const char *application_mimetype = "application"; /* add "/octet-stream"? */ 614 615 static const char *alias_restype = "x-icu-alias"; 616 static const char *array_restype = "x-icu-array"; 617 static const char *binary_restype = "x-icu-binary"; 618 static const char *integer_restype = "x-icu-integer"; 619 static const char *intvector_restype = "x-icu-intvector"; 620 static const char *table_restype = "x-icu-table"; 621 622 static void 623 string_write_xml(StringResource *res, const char* id, const char* /*language*/, UErrorCode *status) { 624 625 char *sid = NULL; 626 char* buf = NULL; 627 int32_t bufLen = 0; 628 629 if(status==NULL || U_FAILURE(*status)){ 630 return; 631 } 632 633 sid = printContainer(res, trans_unit, NULL, NULL, id, status); 634 635 write_tabs(out); 636 637 write_utf8_file(out, UnicodeString(source)); 638 639 buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status); 640 641 if (U_FAILURE(*status)) { 642 return; 643 } 644 645 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8")); 646 write_utf8_file(out, UnicodeString(close_source)); 647 648 printNoteElements(&res->fComment, status); 649 650 tabCount -= 1; 651 write_tabs(out); 652 653 write_utf8_file(out, UnicodeString(close_trans_unit)); 654 655 uprv_free(buf); 656 uprv_free(sid); 657 } 658 659 static void 660 alias_write_xml(AliasResource *res, const char* id, const char* /*language*/, UErrorCode *status) { 661 char *sid = NULL; 662 char* buf = NULL; 663 int32_t bufLen=0; 664 665 sid = printContainer(res, trans_unit, alias_restype, NULL, id, status); 666 667 write_tabs(out); 668 669 write_utf8_file(out, UnicodeString(source)); 670 671 buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status); 672 673 if(U_FAILURE(*status)){ 674 return; 675 } 676 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8")); 677 write_utf8_file(out, UnicodeString(close_source)); 678 679 printNoteElements(&res->fComment, status); 680 681 tabCount -= 1; 682 write_tabs(out); 683 684 write_utf8_file(out, UnicodeString(close_trans_unit)); 685 686 uprv_free(buf); 687 uprv_free(sid); 688 } 689 690 static void 691 array_write_xml(ArrayResource *res, const char* id, const char* language, UErrorCode *status) { 692 char* sid = NULL; 693 int index = 0; 694 695 struct SResource *current = NULL; 696 697 sid = printContainer(res, group, array_restype, NULL, id, status); 698 699 current = res->fFirst; 700 701 while (current != NULL) { 702 char c[256] = {0}; 703 char* subId = NULL; 704 705 itostr(c, index, 10, 0); 706 index += 1; 707 subId = getID(sid, c, subId); 708 709 res_write_xml(current, subId, language, FALSE, status); 710 uprv_free(subId); 711 subId = NULL; 712 713 if(U_FAILURE(*status)){ 714 return; 715 } 716 717 current = current->fNext; 718 } 719 720 tabCount -= 1; 721 write_tabs(out); 722 write_utf8_file(out, UnicodeString(close_group)); 723 724 uprv_free(sid); 725 } 726 727 static void 728 intvector_write_xml(IntVectorResource *res, const char* id, const char* /*language*/, UErrorCode *status) { 729 char* sid = NULL; 730 char* ivd = NULL; 731 uint32_t i=0; 732 uint32_t len=0; 733 char buf[256] = {'0'}; 734 735 sid = printContainer(res, group, intvector_restype, NULL, id, status); 736 737 for(i = 0; i < res->fCount; i += 1) { 738 char c[256] = {0}; 739 740 itostr(c, i, 10, 0); 741 ivd = getID(sid, c, ivd); 742 len = itostr(buf, res->fArray[i], 10, 0); 743 744 write_tabs(out); 745 write_utf8_file(out, UnicodeString("<")); 746 write_utf8_file(out, UnicodeString(trans_unit)); 747 748 printAttribute("id", ivd, (int32_t)uprv_strlen(ivd)); 749 printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype)); 750 751 write_utf8_file(out, UnicodeString(">\n")); 752 753 tabCount += 1; 754 write_tabs(out); 755 write_utf8_file(out, UnicodeString(source)); 756 757 write_utf8_file(out, UnicodeString(buf, len)); 758 759 write_utf8_file(out, UnicodeString(close_source)); 760 tabCount -= 1; 761 write_tabs(out); 762 write_utf8_file(out, UnicodeString(close_trans_unit)); 763 764 uprv_free(ivd); 765 ivd = NULL; 766 } 767 768 tabCount -= 1; 769 write_tabs(out); 770 771 write_utf8_file(out, UnicodeString(close_group)); 772 uprv_free(sid); 773 sid = NULL; 774 } 775 776 static void 777 int_write_xml(IntResource *res, const char* id, const char* /*language*/, UErrorCode *status) { 778 char* sid = NULL; 779 char buf[256] = {0}; 780 uint32_t len = 0; 781 782 sid = printContainer(res, trans_unit, integer_restype, NULL, id, status); 783 784 write_tabs(out); 785 786 write_utf8_file(out, UnicodeString(source)); 787 788 len = itostr(buf, res->fValue, 10, 0); 789 write_utf8_file(out, UnicodeString(buf, len)); 790 791 write_utf8_file(out, UnicodeString(close_source)); 792 793 printNoteElements(&res->fComment, status); 794 795 tabCount -= 1; 796 write_tabs(out); 797 798 write_utf8_file(out, UnicodeString(close_trans_unit)); 799 800 uprv_free(sid); 801 sid = NULL; 802 } 803 804 static void 805 bin_write_xml(BinaryResource *res, const char* id, const char* /*language*/, UErrorCode *status) { 806 const char* m_type = application_mimetype; 807 char* sid = NULL; 808 uint32_t crc = 0xFFFFFFFF; 809 810 char fileName[1024] ={0}; 811 int32_t tLen = ( outDir == NULL) ? 0 :(int32_t)uprv_strlen(outDir); 812 char* fn = (char*) uprv_malloc(sizeof(char) * (tLen+1024 + 813 (res->fFileName !=NULL ? 814 uprv_strlen(res->fFileName) :0))); 815 const char* ext = NULL; 816 817 char* f = NULL; 818 819 fn[0]=0; 820 821 if(res->fFileName != NULL){ 822 uprv_strcpy(fileName, res->fFileName); 823 f = uprv_strrchr(fileName, '\\'); 824 825 if (f != NULL) { 826 f++; 827 } else { 828 f = fileName; 829 } 830 831 ext = uprv_strrchr(fileName, '.'); 832 833 if (ext == NULL) { 834 fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName); 835 exit(U_ILLEGAL_ARGUMENT_ERROR); 836 } 837 838 if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){ 839 m_type = "image"; 840 } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){ 841 m_type = "audio"; 842 } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){ 843 m_type = "video"; 844 } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){ 845 m_type = "text"; 846 } 847 848 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status); 849 850 write_tabs(out); 851 852 write_utf8_file(out, UnicodeString(bin_source)); 853 854 tabCount+= 1; 855 write_tabs(out); 856 857 write_utf8_file(out, UnicodeString(external_file)); 858 printAttribute("href", f, (int32_t)uprv_strlen(f)); 859 write_utf8_file(out, UnicodeString("/>\n")); 860 tabCount -= 1; 861 write_tabs(out); 862 863 write_utf8_file(out, UnicodeString(close_bin_source)); 864 865 printNoteElements(&res->fComment, status); 866 tabCount -= 1; 867 write_tabs(out); 868 write_utf8_file(out, UnicodeString(close_bin_unit)); 869 } else { 870 char temp[256] = {0}; 871 uint32_t i = 0; 872 int32_t len=0; 873 874 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status); 875 876 write_tabs(out); 877 write_utf8_file(out, UnicodeString(bin_source)); 878 879 tabCount += 1; 880 write_tabs(out); 881 882 write_utf8_file(out, UnicodeString(internal_file)); 883 printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype)); 884 885 while(i <res->fLength){ 886 len = itostr(temp, res->fData[i], 16, 2); 887 crc = computeCRC(temp, len, crc); 888 i++; 889 } 890 891 len = itostr(temp, crc, 10, 0); 892 printAttribute("crc", temp, len); 893 894 write_utf8_file(out, UnicodeString(">")); 895 896 i = 0; 897 while(i <res->fLength){ 898 len = itostr(temp, res->fData[i], 16, 2); 899 write_utf8_file(out, UnicodeString(temp)); 900 i += 1; 901 } 902 903 write_utf8_file(out, UnicodeString(close_internal_file)); 904 905 tabCount -= 2; 906 write_tabs(out); 907 908 write_utf8_file(out, UnicodeString(close_bin_source)); 909 printNoteElements(&res->fComment, status); 910 911 tabCount -= 1; 912 write_tabs(out); 913 write_utf8_file(out, UnicodeString(close_bin_unit)); 914 915 uprv_free(sid); 916 sid = NULL; 917 } 918 919 uprv_free(fn); 920 } 921 922 923 924 static void 925 table_write_xml(TableResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) { 926 927 uint32_t i = 0; 928 929 struct SResource *current = NULL; 930 char* sid = NULL; 931 932 if (U_FAILURE(*status)) { 933 return ; 934 } 935 936 sid = printContainer(res, group, table_restype, NULL, id, status); 937 938 if(isTopLevel) { 939 sid[0] = '\0'; 940 } 941 942 current = res->fFirst; 943 i = 0; 944 945 while (current != NULL) { 946 res_write_xml(current, sid, language, FALSE, status); 947 948 if(U_FAILURE(*status)){ 949 return; 950 } 951 952 i += 1; 953 current = current->fNext; 954 } 955 956 tabCount -= 1; 957 write_tabs(out); 958 959 write_utf8_file(out, UnicodeString(close_group)); 960 961 uprv_free(sid); 962 sid = NULL; 963 } 964 965 void 966 res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) { 967 968 if (U_FAILURE(*status)) { 969 return ; 970 } 971 972 if (res != NULL) { 973 switch (res->fType) { 974 case URES_STRING: 975 string_write_xml (static_cast<StringResource *>(res), id, language, status); 976 return; 977 978 case URES_ALIAS: 979 alias_write_xml (static_cast<AliasResource *>(res), id, language, status); 980 return; 981 982 case URES_INT_VECTOR: 983 intvector_write_xml (static_cast<IntVectorResource *>(res), id, language, status); 984 return; 985 986 case URES_BINARY: 987 bin_write_xml (static_cast<BinaryResource *>(res), id, language, status); 988 return; 989 990 case URES_INT: 991 int_write_xml (static_cast<IntResource *>(res), id, language, status); 992 return; 993 994 case URES_ARRAY: 995 array_write_xml (static_cast<ArrayResource *>(res), id, language, status); 996 return; 997 998 case URES_TABLE: 999 table_write_xml (static_cast<TableResource *>(res), id, language, isTopLevel, status); 1000 return; 1001 1002 default: 1003 break; 1004 } 1005 } 1006 1007 *status = U_INTERNAL_PROGRAM_ERROR; 1008 } 1009 1010 void 1011 bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename, 1012 char *writtenFilename, int writtenFilenameLen, 1013 const char* language, const char* outFileName, UErrorCode *status) { 1014 1015 char* xmlfileName = NULL; 1016 char* outputFileName = NULL; 1017 char* originalFileName = NULL; 1018 const char* fileStart = "<file xml:space = \"preserve\" source-language = \""; 1019 const char* file1 = "\" datatype = \"x-icu-resource-bundle\" "; 1020 const char* file2 = "original = \""; 1021 const char* file4 = "\" date = \""; 1022 const char* fileEnd = "</file>\n"; 1023 const char* headerStart = "<header>\n"; 1024 const char* headerEnd = "</header>\n"; 1025 const char* bodyStart = "<body>\n"; 1026 const char* bodyEnd = "</body>\n"; 1027 1028 const char *tool_start = "<tool"; 1029 const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION; 1030 const char *tool_name = "genrb"; 1031 1032 char* temp = NULL; 1033 char* lang = NULL; 1034 const char* pos = NULL; 1035 int32_t first, index; 1036 time_t currTime; 1037 char timeBuf[128]; 1038 1039 outDir = outputDir; 1040 1041 srBundle = bundle; 1042 1043 pos = uprv_strrchr(filename, '\\'); 1044 if(pos != NULL) { 1045 first = (int32_t)(pos - filename + 1); 1046 } else { 1047 first = 0; 1048 } 1049 index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first); 1050 originalFileName = (char *)uprv_malloc(sizeof(char)*index+1); 1051 uprv_memset(originalFileName, 0, sizeof(char)*index+1); 1052 uprv_strncpy(originalFileName, filename + first, index); 1053 1054 if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) { 1055 fprintf(stdout, "Warning: The file name is not same as the resource name!\n"); 1056 } 1057 1058 temp = originalFileName; 1059 originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1); 1060 uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1); 1061 uprv_strcat(originalFileName, temp); 1062 uprv_strcat(originalFileName, textExt); 1063 uprv_free(temp); 1064 temp = NULL; 1065 1066 1067 if (language == NULL) { 1068 /* lang = parseFilename(filename, lang); 1069 if (lang == NULL) {*/ 1070 /* now check if locale name is valid or not 1071 * this is to cater for situation where 1072 * pegasusServer.txt contains 1073 * 1074 * en{ 1075 * .. 1076 * } 1077 */ 1078 lang = parseFilename(srBundle->fLocale, lang); 1079 /* 1080 * Neither the file name nor the table name inside the 1081 * txt file contain a valid country and language codes 1082 * throw an error. 1083 * pegasusServer.txt contains 1084 * 1085 * testelements{ 1086 * .... 1087 * } 1088 */ 1089 if(lang==NULL){ 1090 fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n"); 1091 exit(U_ILLEGAL_ARGUMENT_ERROR); 1092 } 1093 /* }*/ 1094 } else { 1095 lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1); 1096 uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1); 1097 uprv_strcpy(lang, language); 1098 } 1099 1100 if(outFileName) { 1101 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1); 1102 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1); 1103 uprv_strcpy(outputFileName,outFileName); 1104 } else { 1105 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1); 1106 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1); 1107 uprv_strcpy(outputFileName,srBundle->fLocale); 1108 } 1109 1110 if(outputDir) { 1111 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1); 1112 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1); 1113 } else { 1114 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1); 1115 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1); 1116 } 1117 1118 if(outputDir){ 1119 uprv_strcpy(xmlfileName, outputDir); 1120 if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){ 1121 uprv_strcat(xmlfileName,U_FILE_SEP_STRING); 1122 } 1123 } 1124 uprv_strcat(xmlfileName,outputFileName); 1125 uprv_strcat(xmlfileName,xliffExt); 1126 1127 if (writtenFilename) { 1128 uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen); 1129 } 1130 1131 if (U_FAILURE(*status)) { 1132 goto cleanup_bundle_write_xml; 1133 } 1134 1135 out= T_FileStream_open(xmlfileName,"w"); 1136 1137 if(out==NULL){ 1138 *status = U_FILE_ACCESS_ERROR; 1139 goto cleanup_bundle_write_xml; 1140 } 1141 write_utf8_file(out, UnicodeString(xmlHeader)); 1142 1143 if(outputEnc && *outputEnc!='\0'){ 1144 /* store the output encoding */ 1145 enc = outputEnc; 1146 conv=ucnv_open(enc,status); 1147 if(U_FAILURE(*status)){ 1148 goto cleanup_bundle_write_xml; 1149 } 1150 } 1151 write_utf8_file(out, UnicodeString(bundleStart)); 1152 write_tabs(out); 1153 write_utf8_file(out, UnicodeString(fileStart)); 1154 /* check if lang and language are the same */ 1155 if(language != NULL && uprv_strcmp(lang, srBundle->fLocale)!=0){ 1156 fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n"); 1157 } 1158 write_utf8_file(out, UnicodeString(lang)); 1159 write_utf8_file(out, UnicodeString(file1)); 1160 write_utf8_file(out, UnicodeString(file2)); 1161 write_utf8_file(out, UnicodeString(originalFileName)); 1162 write_utf8_file(out, UnicodeString(file4)); 1163 1164 time(&currTime); 1165 strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime)); 1166 write_utf8_file(out, UnicodeString(timeBuf)); 1167 write_utf8_file(out, UnicodeString("\">\n")); 1168 1169 tabCount += 1; 1170 write_tabs(out); 1171 write_utf8_file(out, UnicodeString(headerStart)); 1172 1173 tabCount += 1; 1174 write_tabs(out); 1175 1176 write_utf8_file(out, UnicodeString(tool_start)); 1177 printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id)); 1178 printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name)); 1179 write_utf8_file(out, UnicodeString("/>\n")); 1180 1181 tabCount -= 1; 1182 write_tabs(out); 1183 1184 write_utf8_file(out, UnicodeString(headerEnd)); 1185 1186 write_tabs(out); 1187 tabCount += 1; 1188 1189 write_utf8_file(out, UnicodeString(bodyStart)); 1190 1191 1192 res_write_xml(bundle->fRoot, bundle->fLocale, lang, TRUE, status); 1193 1194 tabCount -= 1; 1195 write_tabs(out); 1196 1197 write_utf8_file(out, UnicodeString(bodyEnd)); 1198 tabCount--; 1199 write_tabs(out); 1200 write_utf8_file(out, UnicodeString(fileEnd)); 1201 tabCount--; 1202 write_tabs(out); 1203 write_utf8_file(out, UnicodeString(bundleEnd)); 1204 T_FileStream_close(out); 1205 1206 ucnv_close(conv); 1207 1208 cleanup_bundle_write_xml: 1209 uprv_free(originalFileName); 1210 uprv_free(lang); 1211 if(xmlfileName != NULL) { 1212 uprv_free(xmlfileName); 1213 } 1214 if(outputFileName != NULL){ 1215 uprv_free(outputFileName); 1216 } 1217 } 1218