1 #include "SourcePos.h" 2 #include "ValuesFile.h" 3 #include "XLIFFFile.h" 4 #include "Perforce.h" 5 #include "merge_res_and_xliff.h" 6 #include "localize.h" 7 #include "file_utils.h" 8 #include "res_check.h" 9 #include "xmb.h" 10 11 #include <host/pseudolocalize.h> 12 13 #include <stdlib.h> 14 #include <stdarg.h> 15 #include <sstream> 16 #include <stdio.h> 17 #include <string.h> 18 #include <stdlib.h> 19 20 using namespace std; 21 22 FILE* g_logFile = NULL; 23 24 int test(); 25 26 int 27 read_settings(const string& filename, map<string,Settings>* result, const string& rootDir) 28 { 29 XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY); 30 if (root == NULL) { 31 SourcePos(filename, -1).Error("Error reading file."); 32 return 1; 33 } 34 35 // <configuration> 36 vector<XMLNode*> configNodes = root->GetElementsByName("", "configuration"); 37 const size_t I = configNodes.size(); 38 for (size_t i=0; i<I; i++) { 39 const XMLNode* configNode = configNodes[i]; 40 41 Settings settings; 42 settings.id = configNode->GetAttribute("", "id", ""); 43 if (settings.id == "") { 44 configNode->Position().Error("<configuration> needs an id attribute."); 45 delete root; 46 return 1; 47 } 48 49 settings.oldVersion = configNode->GetAttribute("", "old-cl", ""); 50 51 settings.currentVersion = configNode->GetAttribute("", "new-cl", ""); 52 if (settings.currentVersion == "") { 53 configNode->Position().Error("<configuration> needs a new-cl attribute."); 54 delete root; 55 return 1; 56 } 57 58 // <app> 59 vector<XMLNode*> appNodes = configNode->GetElementsByName("", "app"); 60 61 const size_t J = appNodes.size(); 62 for (size_t j=0; j<J; j++) { 63 const XMLNode* appNode = appNodes[j]; 64 65 string dir = appNode->GetAttribute("", "dir", ""); 66 if (dir == "") { 67 appNode->Position().Error("<app> needs a dir attribute."); 68 delete root; 69 return 1; 70 } 71 72 settings.apps.push_back(dir); 73 } 74 75 // <reject> 76 vector<XMLNode*> rejectNodes = configNode->GetElementsByName("", "reject"); 77 78 const size_t K = rejectNodes.size(); 79 for (size_t k=0; k<K; k++) { 80 const XMLNode* rejectNode = rejectNodes[k]; 81 82 Reject reject; 83 84 reject.file = rejectNode->GetAttribute("", "file", ""); 85 if (reject.file == "") { 86 rejectNode->Position().Error("<reject> needs a file attribute."); 87 delete root; 88 return 1; 89 } 90 string f = reject.file; 91 reject.file = rootDir; 92 reject.file += '/'; 93 reject.file += f; 94 95 reject.name = rejectNode->GetAttribute("", "name", ""); 96 if (reject.name == "") { 97 rejectNode->Position().Error("<reject> needs a name attribute."); 98 delete root; 99 return 1; 100 } 101 102 reject.comment = trim_string(rejectNode->CollapseTextContents()); 103 104 settings.reject.push_back(reject); 105 } 106 107 (*result)[settings.id] = settings; 108 } 109 110 delete root; 111 return 0; 112 } 113 114 115 static void 116 ValuesFile_to_XLIFFFile(const ValuesFile* values, XLIFFFile* xliff, const string& englishFilename) 117 { 118 const set<StringResource>& strings = values->GetStrings(); 119 for (set<StringResource>::const_iterator it=strings.begin(); it!=strings.end(); it++) { 120 StringResource res = *it; 121 res.file = englishFilename; 122 xliff->AddStringResource(res); 123 } 124 } 125 126 static bool 127 contains_reject(const Settings& settings, const string& file, const TransUnit& tu) 128 { 129 const string name = tu.id; 130 const vector<Reject>& reject = settings.reject; 131 const size_t I = reject.size(); 132 for (size_t i=0; i<I; i++) { 133 const Reject& r = reject[i]; 134 if (r.file == file && r.name == name) { 135 return true; 136 } 137 } 138 return false; 139 } 140 141 /** 142 * If it's been rejected, then we keep whatever info we have. 143 * 144 * Implements this truth table: 145 * 146 * S AT AS Keep 147 * ----------------------- 148 * 0 0 0 0 (this case can't happen) 149 * 0 0 1 0 (it was there, never translated, and removed) 150 * 0 1 0 0 (somehow it got translated, but it was removed) 151 * 0 1 1 0 (it was removed after having been translated) 152 * 153 * 1 0 0 1 (it was just added) 154 * 1 0 1 1 (it was added, has been changed, but it never got translated) 155 * 1 1 0 1 (somehow it got translated, but we don't know based on what) 156 * 1 1 1 0/1 (it's in both. 0 if S=AS b/c there's no need to retranslate if they're 157 * the same. 1 if S!=AS because S changed, so it should be retranslated) 158 * 159 * The first four are cases where, whatever happened in the past, the string isn't there 160 * now, so it shouldn't be in the XLIFF file. 161 * 162 * For cases 4 and 5, the string has never been translated, so get it translated. 163 * 164 * For case 6, it's unclear where the translated version came from, so we're conservative 165 * and send it back for them to have another shot at. 166 * 167 * For case 7, we have some data. We have two choices. We could rely on the translator's 168 * translation memory or tools to notice that the strings haven't changed, and populate the 169 * <target> field themselves. Or if the string hasn't changed since last time, we can just 170 * not even tell them about it. As the project nears the end, it will be convenient to see 171 * the xliff files reducing in size, so we pick the latter. Obviously, if the string has 172 * changed, then we need to get it retranslated. 173 */ 174 bool 175 keep_this_trans_unit(const string& file, const TransUnit& unit, void* cookie) 176 { 177 const Settings* settings = reinterpret_cast<const Settings*>(cookie); 178 179 if (contains_reject(*settings, file, unit)) { 180 return true; 181 } 182 183 if (unit.source.id == "") { 184 return false; 185 } 186 if (unit.altTarget.id == "" || unit.altSource.id == "") { 187 return true; 188 } 189 return unit.source.value->ContentsToString(XLIFF_NAMESPACES) 190 != unit.altSource.value->ContentsToString(XLIFF_NAMESPACES); 191 } 192 193 int 194 validate_config(const string& settingsFile, const map<string,Settings>& settings, 195 const string& config) 196 { 197 if (settings.find(config) == settings.end()) { 198 SourcePos(settingsFile, -1).Error("settings file does not contain setting: %s\n", 199 config.c_str()); 200 return 1; 201 } 202 return 0; 203 } 204 205 int 206 validate_configs(const string& settingsFile, const map<string,Settings>& settings, 207 const vector<string>& configs) 208 { 209 int err = 0; 210 for (size_t i=0; i<configs.size(); i++) { 211 string config = configs[i]; 212 err |= validate_config(settingsFile, settings, config); 213 } 214 return err; 215 } 216 217 int 218 select_files(vector<string> *resFiles, const string& config, 219 const map<string,Settings>& settings, const string& rootDir) 220 { 221 int err; 222 vector<vector<string> > allResFiles; 223 vector<string> configs; 224 configs.push_back(config); 225 err = select_files(&allResFiles, configs, settings, rootDir); 226 if (err == 0) { 227 *resFiles = allResFiles[0]; 228 } 229 return err; 230 } 231 232 int 233 select_files(vector<vector<string> > *allResFiles, const vector<string>& configs, 234 const map<string,Settings>& settings, const string& rootDir) 235 { 236 int err; 237 printf("Selecting files..."); 238 fflush(stdout); 239 240 for (size_t i=0; i<configs.size(); i++) { 241 const string& config = configs[i]; 242 const Settings& setting = settings.find(config)->second; 243 244 vector<string> resFiles; 245 err = Perforce::GetResourceFileNames(setting.currentVersion, rootDir, 246 setting.apps, &resFiles, true); 247 if (err != 0) { 248 fprintf(stderr, "error with perforce. bailing\n"); 249 return err; 250 } 251 252 allResFiles->push_back(resFiles); 253 } 254 return 0; 255 } 256 257 static int 258 do_export(const string& settingsFile, const string& rootDir, const string& outDir, 259 const string& targetLocale, const vector<string>& configs) 260 { 261 bool success = true; 262 int err; 263 264 if (false) { 265 printf("settingsFile=%s\n", settingsFile.c_str()); 266 printf("rootDir=%s\n", rootDir.c_str()); 267 printf("outDir=%s\n", outDir.c_str()); 268 for (size_t i=0; i<configs.size(); i++) { 269 printf("config[%zd]=%s\n", i, configs[i].c_str()); 270 } 271 } 272 273 map<string,Settings> settings; 274 err = read_settings(settingsFile, &settings, rootDir); 275 if (err != 0) { 276 return err; 277 } 278 279 err = validate_configs(settingsFile, settings, configs); 280 if (err != 0) { 281 return err; 282 } 283 284 vector<vector<string> > allResFiles; 285 err = select_files(&allResFiles, configs, settings, rootDir); 286 if (err != 0) { 287 return err; 288 } 289 290 size_t totalFileCount = 0; 291 for (size_t i=0; i<allResFiles.size(); i++) { 292 totalFileCount += allResFiles[i].size(); 293 } 294 totalFileCount *= 3; // we try all 3 versions of the file 295 296 size_t fileProgress = 0; 297 vector<Stats> stats; 298 vector<pair<string,XLIFFFile*> > xliffs; 299 300 for (size_t i=0; i<configs.size(); i++) { 301 const string& config = configs[i]; 302 const Settings& setting = settings[config]; 303 304 if (false) { 305 fprintf(stderr, "Configuration: %s (%zd of %zd)\n", config.c_str(), i+1, 306 configs.size()); 307 fprintf(stderr, " Old CL: %s\n", setting.oldVersion.c_str()); 308 fprintf(stderr, " Current CL: %s\n", setting.currentVersion.c_str()); 309 } 310 311 Configuration english; 312 english.locale = "en_US"; 313 Configuration translated; 314 translated.locale = targetLocale; 315 XLIFFFile* xliff = XLIFFFile::Create(english, translated, setting.currentVersion); 316 317 const vector<string>& resFiles = allResFiles[i]; 318 const size_t J = resFiles.size(); 319 for (size_t j=0; j<J; j++) { 320 string resFile = resFiles[j]; 321 322 // parse the files into a ValuesFile 323 // pull out the strings and add them to the XLIFFFile 324 325 // current file 326 print_file_status(++fileProgress, totalFileCount); 327 ValuesFile* currentFile = get_values_file(resFile, english, CURRENT_VERSION, 328 setting.currentVersion, true); 329 if (currentFile != NULL) { 330 ValuesFile_to_XLIFFFile(currentFile, xliff, resFile); 331 //printf("currentFile=[%s]\n", currentFile->ToString().c_str()); 332 } else { 333 fprintf(stderr, "error reading file %s@%s\n", resFile.c_str(), 334 setting.currentVersion.c_str()); 335 success = false; 336 } 337 338 // old file 339 print_file_status(++fileProgress, totalFileCount); 340 ValuesFile* oldFile = get_values_file(resFile, english, OLD_VERSION, 341 setting.oldVersion, false); 342 if (oldFile != NULL) { 343 ValuesFile_to_XLIFFFile(oldFile, xliff, resFile); 344 //printf("oldFile=[%s]\n", oldFile->ToString().c_str()); 345 } 346 347 // translated version 348 // (get the head of the tree for the most recent translation, but it's considered 349 // the old one because the "current" one hasn't been made yet, and this goes into 350 // the <alt-trans> tag if necessary 351 print_file_status(++fileProgress, totalFileCount); 352 string transFilename = translated_file_name(resFile, targetLocale); 353 ValuesFile* transFile = get_values_file(transFilename, translated, OLD_VERSION, 354 setting.currentVersion, false); 355 if (transFile != NULL) { 356 ValuesFile_to_XLIFFFile(transFile, xliff, resFile); 357 } 358 359 delete currentFile; 360 delete oldFile; 361 delete transFile; 362 } 363 364 Stats beforeFilterStats = xliff->GetStats(config); 365 366 // run through the XLIFFFile and strip out TransUnits that have identical 367 // old and current source values and are not in the reject list, or just 368 // old values and no source values 369 xliff->Filter(keep_this_trans_unit, (void*)&setting); 370 371 Stats afterFilterStats = xliff->GetStats(config); 372 afterFilterStats.totalStrings = beforeFilterStats.totalStrings; 373 374 // add the reject comments 375 for (vector<Reject>::const_iterator reject = setting.reject.begin(); 376 reject != setting.reject.end(); reject++) { 377 TransUnit* tu = xliff->EditTransUnit(reject->file, reject->name); 378 tu->rejectComment = reject->comment; 379 } 380 381 // config-locale-current_cl.xliff 382 stringstream filename; 383 if (outDir != "") { 384 filename << outDir << '/'; 385 } 386 filename << config << '-' << targetLocale << '-' << setting.currentVersion << ".xliff"; 387 xliffs.push_back(pair<string,XLIFFFile*>(filename.str(), xliff)); 388 389 stats.push_back(afterFilterStats); 390 } 391 392 // today is a good day to die 393 if (!success || SourcePos::HasErrors()) { 394 return 1; 395 } 396 397 // write the XLIFF files 398 printf("\nWriting %zd file%s...\n", xliffs.size(), xliffs.size() == 1 ? "" : "s"); 399 for (vector<pair<string,XLIFFFile*> >::iterator it = xliffs.begin(); it != xliffs.end(); it++) { 400 const string& filename = it->first; 401 XLIFFFile* xliff = it->second; 402 string text = xliff->ToString(); 403 write_to_file(filename, text); 404 } 405 406 // the stats 407 printf("\n" 408 " to without total\n" 409 " config files translate comments strings\n" 410 "-----------------------------------------------------------------------\n"); 411 Stats totals; 412 totals.config = "total"; 413 totals.files = 0; 414 totals.toBeTranslated = 0; 415 totals.noComments = 0; 416 totals.totalStrings = 0; 417 for (vector<Stats>::iterator it=stats.begin(); it!=stats.end(); it++) { 418 string cfg = it->config; 419 if (cfg.length() > 20) { 420 cfg.resize(20); 421 } 422 printf(" %-20s %-9zd %-9zd %-9zd %-19zd\n", cfg.c_str(), it->files, 423 it->toBeTranslated, it->noComments, it->totalStrings); 424 totals.files += it->files; 425 totals.toBeTranslated += it->toBeTranslated; 426 totals.noComments += it->noComments; 427 totals.totalStrings += it->totalStrings; 428 } 429 if (stats.size() > 1) { 430 printf("-----------------------------------------------------------------------\n" 431 " %-20s %-9zd %-9zd %-9zd %-19zd\n", totals.config.c_str(), totals.files, 432 totals.toBeTranslated, totals.noComments, totals.totalStrings); 433 } 434 printf("\n"); 435 return 0; 436 } 437 438 struct PseudolocalizeSettings { 439 XLIFFFile* xliff; 440 bool expand; 441 }; 442 443 444 string 445 pseudolocalize_string(const string& source, const PseudolocalizeSettings* settings) 446 { 447 return pseudolocalize_string(source); 448 } 449 450 static XMLNode* 451 pseudolocalize_xml_node(const XMLNode* source, const PseudolocalizeSettings* settings) 452 { 453 if (source->Type() == XMLNode::TEXT) { 454 return XMLNode::NewText(source->Position(), pseudolocalize_string(source->Text(), settings), 455 source->Pretty()); 456 } else { 457 XMLNode* target; 458 if (source->Namespace() == XLIFF_XMLNS && source->Name() == "g") { 459 // XXX don't translate these 460 target = XMLNode::NewElement(source->Position(), source->Namespace(), 461 source->Name(), source->Attributes(), source->Pretty()); 462 } else { 463 target = XMLNode::NewElement(source->Position(), source->Namespace(), 464 source->Name(), source->Attributes(), source->Pretty()); 465 } 466 467 const vector<XMLNode*>& children = source->Children(); 468 const size_t I = children.size(); 469 for (size_t i=0; i<I; i++) { 470 target->EditChildren().push_back(pseudolocalize_xml_node(children[i], settings)); 471 } 472 473 return target; 474 } 475 } 476 477 void 478 pseudolocalize_trans_unit(const string&file, TransUnit* unit, void* cookie) 479 { 480 const PseudolocalizeSettings* settings = (PseudolocalizeSettings*)cookie; 481 482 const StringResource& source = unit->source; 483 StringResource* target = &unit->target; 484 *target = source; 485 486 target->config = settings->xliff->TargetConfig(); 487 488 delete target->value; 489 target->value = pseudolocalize_xml_node(source.value, settings); 490 } 491 492 int 493 pseudolocalize_xliff(XLIFFFile* xliff, bool expand) 494 { 495 PseudolocalizeSettings settings; 496 497 settings.xliff = xliff; 498 settings.expand = expand; 499 xliff->Map(pseudolocalize_trans_unit, &settings); 500 return 0; 501 } 502 503 static int 504 do_pseudo(const string& infile, const string& outfile, bool expand) 505 { 506 int err; 507 508 XLIFFFile* xliff = XLIFFFile::Parse(infile); 509 if (xliff == NULL) { 510 return 1; 511 } 512 513 pseudolocalize_xliff(xliff, expand); 514 515 err = write_to_file(outfile, xliff->ToString()); 516 517 delete xliff; 518 519 return err; 520 } 521 522 void 523 log_printf(const char *fmt, ...) 524 { 525 int ret; 526 va_list ap; 527 528 if (g_logFile != NULL) { 529 va_start(ap, fmt); 530 ret = vfprintf(g_logFile, fmt, ap); 531 va_end(ap); 532 fflush(g_logFile); 533 } 534 } 535 536 void 537 close_log_file() 538 { 539 if (g_logFile != NULL) { 540 fclose(g_logFile); 541 } 542 } 543 544 void 545 open_log_file(const char* file) 546 { 547 g_logFile = fopen(file, "w"); 548 printf("log file: %s -- %p\n", file, g_logFile); 549 atexit(close_log_file); 550 } 551 552 static int 553 usage() 554 { 555 fprintf(stderr, 556 "usage: localize export OPTIONS CONFIGS...\n" 557 " REQUIRED OPTIONS\n" 558 " --settings SETTINGS The settings file to use. See CONFIGS below.\n" 559 " --root TREE_ROOT The location in Perforce of the files. e.g. //device\n" 560 " --target LOCALE The target locale. See LOCALES below.\n" 561 "\n" 562 " OPTIONAL OPTIONS\n" 563 " --out DIR Directory to put the output files. Defaults to the\n" 564 " current directory if not supplied. Files are\n" 565 " named as follows:\n" 566 " CONFIG-LOCALE-CURRENT_CL.xliff\n" 567 "\n" 568 "\n" 569 "usage: localize import XLIFF_FILE...\n" 570 "\n" 571 "Import a translated XLIFF file back into the tree.\n" 572 "\n" 573 "\n" 574 "usage: localize xlb XMB_FILE VALUES_FILES...\n" 575 "\n" 576 "Read resource files from the tree file and write the corresponding XLB file\n" 577 "\n" 578 "Supply all of the android resource files (values files) to export after that.\n" 579 "\n" 580 "\n" 581 "\n" 582 "CONFIGS\n" 583 "\n" 584 "LOCALES\n" 585 "Locales are specified in the form en_US They will be processed correctly\n" 586 "to locate the resouce files in the tree.\n" 587 "\n" 588 "\n" 589 "usage: localize pseudo OPTIONS INFILE [OUTFILE]\n" 590 " OPTIONAL OPTIONS\n" 591 " --big Pad strings so they get longer.\n" 592 "\n" 593 "Read INFILE, an XLIFF file, and output a pseudotranslated version of that file. If\n" 594 "OUTFILE is specified, the results are written there; otherwise, the results are\n" 595 "written back to INFILE.\n" 596 "\n" 597 "\n" 598 "usage: localize rescheck FILES...\n" 599 "\n" 600 "Reads the base strings and prints warnings about bad resources from the given files.\n" 601 "\n"); 602 return 1; 603 } 604 605 int 606 main(int argc, const char** argv) 607 { 608 //open_log_file("log.txt"); 609 //g_logFile = stdout; 610 611 if (argc == 2 && 0 == strcmp(argv[1], "--test")) { 612 return test(); 613 } 614 615 if (argc < 2) { 616 return usage(); 617 } 618 619 int index = 1; 620 621 if (0 == strcmp("export", argv[index])) { 622 string settingsFile; 623 string rootDir; 624 string outDir; 625 string baseLocale = "en"; 626 string targetLocale; 627 string language, region; 628 vector<string> configs; 629 630 index++; 631 while (index < argc) { 632 if (0 == strcmp("--settings", argv[index])) { 633 settingsFile = argv[index+1]; 634 index += 2; 635 } 636 else if (0 == strcmp("--root", argv[index])) { 637 rootDir = argv[index+1]; 638 index += 2; 639 } 640 else if (0 == strcmp("--out", argv[index])) { 641 outDir = argv[index+1]; 642 index += 2; 643 } 644 else if (0 == strcmp("--target", argv[index])) { 645 targetLocale = argv[index+1]; 646 index += 2; 647 } 648 else if (argv[index][0] == '-') { 649 fprintf(stderr, "unknown argument %s\n", argv[index]); 650 return usage(); 651 } 652 else { 653 break; 654 } 655 } 656 for (; index<argc; index++) { 657 configs.push_back(argv[index]); 658 } 659 660 if (settingsFile == "" || rootDir == "" || configs.size() == 0 || targetLocale == "") { 661 return usage(); 662 } 663 if (!split_locale(targetLocale, &language, ®ion)) { 664 fprintf(stderr, "illegal --target locale: '%s'\n", targetLocale.c_str()); 665 return usage(); 666 } 667 668 669 return do_export(settingsFile, rootDir, outDir, targetLocale, configs); 670 } 671 else if (0 == strcmp("import", argv[index])) { 672 vector<string> xliffFilenames; 673 674 index++; 675 for (; index<argc; index++) { 676 xliffFilenames.push_back(argv[index]); 677 } 678 679 return do_merge(xliffFilenames); 680 } 681 else if (0 == strcmp("xlb", argv[index])) { 682 string outfile; 683 vector<string> resFiles; 684 685 index++; 686 if (argc < index+1) { 687 return usage(); 688 } 689 690 outfile = argv[index]; 691 692 index++; 693 for (; index<argc; index++) { 694 resFiles.push_back(argv[index]); 695 } 696 697 return do_xlb_export(outfile, resFiles); 698 } 699 else if (0 == strcmp("pseudo", argv[index])) { 700 string infile; 701 string outfile; 702 bool big = false; 703 704 index++; 705 while (index < argc) { 706 if (0 == strcmp("--big", argv[index])) { 707 big = true; 708 index += 1; 709 } 710 else if (argv[index][0] == '-') { 711 fprintf(stderr, "unknown argument %s\n", argv[index]); 712 return usage(); 713 } 714 else { 715 break; 716 } 717 } 718 719 if (index == argc-1) { 720 infile = argv[index]; 721 outfile = argv[index]; 722 } 723 else if (index == argc-2) { 724 infile = argv[index]; 725 outfile = argv[index+1]; 726 } 727 else { 728 fprintf(stderr, "unknown argument %s\n", argv[index]); 729 return usage(); 730 } 731 732 return do_pseudo(infile, outfile, big); 733 } 734 else if (0 == strcmp("rescheck", argv[index])) { 735 vector<string> files; 736 737 index++; 738 while (index < argc) { 739 if (argv[index][0] == '-') { 740 fprintf(stderr, "unknown argument %s\n", argv[index]); 741 return usage(); 742 } 743 else { 744 break; 745 } 746 } 747 for (; index<argc; index++) { 748 files.push_back(argv[index]); 749 } 750 751 if (files.size() == 0) { 752 return usage(); 753 } 754 755 return do_rescheck(files); 756 } 757 else { 758 return usage(); 759 } 760 761 if (SourcePos::HasErrors()) { 762 SourcePos::PrintErrors(stderr); 763 return 1; 764 } 765 766 return 0; 767 } 768 769