1 #include "XLIFFFile.h" 2 3 #include <algorithm> 4 #include <sys/time.h> 5 #include <time.h> 6 #include <cstdio> 7 8 const char* const XLIFF_XMLNS = "urn:oasis:names:tc:xliff:document:1.2"; 9 10 const char *const NS_MAP[] = { 11 "", XLIFF_XMLNS, 12 "xml", XMLNS_XMLNS, 13 NULL, NULL 14 }; 15 16 const XMLNamespaceMap XLIFF_NAMESPACES(NS_MAP); 17 18 int 19 XLIFFFile::File::Compare(const XLIFFFile::File& that) const 20 { 21 if (filename != that.filename) { 22 return filename < that.filename ? -1 : 1; 23 } 24 return 0; 25 } 26 27 // ===================================================================================== 28 XLIFFFile::XLIFFFile() 29 { 30 } 31 32 XLIFFFile::~XLIFFFile() 33 { 34 } 35 36 static XMLNode* 37 get_unique_node(const XMLNode* parent, const string& ns, const string& name, bool required) 38 { 39 size_t count = parent->CountElementsByName(ns, name); 40 if (count == 1) { 41 return parent->GetElementByNameAt(ns, name, 0); 42 } else { 43 if (required) { 44 SourcePos pos = count == 0 45 ? parent->Position() 46 : parent->GetElementByNameAt(XLIFF_XMLNS, name, 1)->Position(); 47 pos.Error("<%s> elements must contain exactly one <%s> element", 48 parent->Name().c_str(), name.c_str()); 49 } 50 return NULL; 51 } 52 } 53 54 XLIFFFile* 55 XLIFFFile::Parse(const string& filename) 56 { 57 XLIFFFile* result = new XLIFFFile(); 58 59 XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY); 60 if (root == NULL) { 61 return NULL; 62 } 63 64 // <file> 65 vector<XMLNode*> files = root->GetElementsByName(XLIFF_XMLNS, "file"); 66 for (size_t i=0; i<files.size(); i++) { 67 XMLNode* file = files[i]; 68 69 string datatype = file->GetAttribute("", "datatype", ""); 70 string originalFile = file->GetAttribute("", "original", ""); 71 72 Configuration sourceConfig; 73 sourceConfig.locale = file->GetAttribute("", "source-language", ""); 74 result->m_sourceConfig = sourceConfig; 75 76 Configuration targetConfig; 77 targetConfig.locale = file->GetAttribute("", "target-language", ""); 78 result->m_targetConfig = targetConfig; 79 80 result->m_currentVersion = file->GetAttribute("", "build-num", ""); 81 result->m_oldVersion = "old"; 82 83 // <body> 84 XMLNode* body = get_unique_node(file, XLIFF_XMLNS, "body", true); 85 if (body == NULL) continue; 86 87 // <trans-unit> 88 vector<XMLNode*> transUnits = body->GetElementsByName(XLIFF_XMLNS, "trans-unit"); 89 for (size_t j=0; j<transUnits.size(); j++) { 90 XMLNode* transUnit = transUnits[j]; 91 92 string rawID = transUnit->GetAttribute("", "id", ""); 93 if (rawID == "") { 94 transUnit->Position().Error("<trans-unit> tag requires an id"); 95 continue; 96 } 97 string id; 98 int index; 99 100 if (!StringResource::ParseTypedID(rawID, &id, &index)) { 101 transUnit->Position().Error("<trans-unit> has invalid id '%s'\n", rawID.c_str()); 102 continue; 103 } 104 105 // <source> 106 XMLNode* source = get_unique_node(transUnit, XLIFF_XMLNS, "source", false); 107 if (source != NULL) { 108 XMLNode* node = source->Clone(); 109 node->SetPrettyRecursive(XMLNode::EXACT); 110 result->AddStringResource(StringResource(source->Position(), originalFile, 111 sourceConfig, id, index, node, CURRENT_VERSION, 112 result->m_currentVersion)); 113 } 114 115 // <target> 116 XMLNode* target = get_unique_node(transUnit, XLIFF_XMLNS, "target", false); 117 if (target != NULL) { 118 XMLNode* node = target->Clone(); 119 node->SetPrettyRecursive(XMLNode::EXACT); 120 result->AddStringResource(StringResource(target->Position(), originalFile, 121 targetConfig, id, index, node, CURRENT_VERSION, 122 result->m_currentVersion)); 123 } 124 125 // <alt-trans> 126 XMLNode* altTrans = get_unique_node(transUnit, XLIFF_XMLNS, "alt-trans", false); 127 if (altTrans != NULL) { 128 // <source> 129 XMLNode* altSource = get_unique_node(altTrans, XLIFF_XMLNS, "source", false); 130 if (altSource != NULL) { 131 XMLNode* node = altSource->Clone(); 132 node->SetPrettyRecursive(XMLNode::EXACT); 133 result->AddStringResource(StringResource(altSource->Position(), 134 originalFile, sourceConfig, id, index, node, OLD_VERSION, 135 result->m_oldVersion)); 136 } 137 138 // <target> 139 XMLNode* altTarget = get_unique_node(altTrans, XLIFF_XMLNS, "target", false); 140 if (altTarget != NULL) { 141 XMLNode* node = altTarget->Clone(); 142 node->SetPrettyRecursive(XMLNode::EXACT); 143 result->AddStringResource(StringResource(altTarget->Position(), 144 originalFile, targetConfig, id, index, node, OLD_VERSION, 145 result->m_oldVersion)); 146 } 147 } 148 } 149 } 150 delete root; 151 return result; 152 } 153 154 XLIFFFile* 155 XLIFFFile::Create(const Configuration& sourceConfig, const Configuration& targetConfig, 156 const string& currentVersion) 157 { 158 XLIFFFile* result = new XLIFFFile(); 159 result->m_sourceConfig = sourceConfig; 160 result->m_targetConfig = targetConfig; 161 result->m_currentVersion = currentVersion; 162 return result; 163 } 164 165 set<string> 166 XLIFFFile::Files() const 167 { 168 set<string> result; 169 for (vector<File>::const_iterator f = m_files.begin(); f != m_files.end(); f++) { 170 result.insert(f->filename); 171 } 172 return result; 173 } 174 175 void 176 XLIFFFile::AddStringResource(const StringResource& str) 177 { 178 string id = str.TypedID(); 179 180 File* f = NULL; 181 const size_t I = m_files.size(); 182 for (size_t i=0; i<I; i++) { 183 if (m_files[i].filename == str.file) { 184 f = &m_files[i]; 185 break; 186 } 187 } 188 if (f == NULL) { 189 File file; 190 file.filename = str.file; 191 m_files.push_back(file); 192 f = &m_files[I]; 193 } 194 195 const size_t J = f->transUnits.size(); 196 TransUnit* g = NULL; 197 for (size_t j=0; j<J; j++) { 198 if (f->transUnits[j].id == id) { 199 g = &f->transUnits[j]; 200 } 201 } 202 if (g == NULL) { 203 TransUnit group; 204 group.id = id; 205 f->transUnits.push_back(group); 206 g = &f->transUnits[J]; 207 } 208 209 StringResource* res = find_string_res(*g, str); 210 if (res == NULL) { 211 return ; 212 } 213 if (res->id != "") { 214 str.pos.Error("Duplicate string resource: %s", res->id.c_str()); 215 res->pos.Error("Previous definition here"); 216 return ; 217 } 218 *res = str; 219 220 m_strings.insert(str); 221 } 222 223 void 224 XLIFFFile::Filter(bool (*func)(const string&,const TransUnit&,void*), void* cookie) 225 { 226 const size_t I = m_files.size(); 227 for (size_t ix=0, i=I-1; ix<I; ix++, i--) { 228 File& file = m_files[i]; 229 230 const size_t J = file.transUnits.size(); 231 for (size_t jx=0, j=J-1; jx<J; jx++, j--) { 232 TransUnit& tu = file.transUnits[j]; 233 234 bool keep = func(file.filename, tu, cookie); 235 if (!keep) { 236 if (tu.source.id != "") { 237 m_strings.erase(tu.source); 238 } 239 if (tu.target.id != "") { 240 m_strings.erase(tu.target); 241 } 242 if (tu.altSource.id != "") { 243 m_strings.erase(tu.altSource); 244 } 245 if (tu.altTarget.id != "") { 246 m_strings.erase(tu.altTarget); 247 } 248 file.transUnits.erase(file.transUnits.begin()+j); 249 } 250 } 251 if (file.transUnits.size() == 0) { 252 m_files.erase(m_files.begin()+i); 253 } 254 } 255 } 256 257 void 258 XLIFFFile::Map(void (*func)(const string&,TransUnit*,void*), void* cookie) 259 { 260 const size_t I = m_files.size(); 261 for (size_t i=0; i<I; i++) { 262 File& file = m_files[i]; 263 264 const size_t J = file.transUnits.size(); 265 for (size_t j=0; j<J; j++) { 266 func(file.filename, &(file.transUnits[j]), cookie); 267 } 268 } 269 } 270 271 TransUnit* 272 XLIFFFile::EditTransUnit(const string& filename, const string& id) 273 { 274 const size_t I = m_files.size(); 275 for (size_t ix=0, i=I-1; ix<I; ix++, i--) { 276 File& file = m_files[i]; 277 if (file.filename == filename) { 278 const size_t J = file.transUnits.size(); 279 for (size_t jx=0, j=J-1; jx<J; jx++, j--) { 280 TransUnit& tu = file.transUnits[j]; 281 if (tu.id == id) { 282 return &tu; 283 } 284 } 285 } 286 } 287 return NULL; 288 } 289 290 StringResource* 291 XLIFFFile::find_string_res(TransUnit& g, const StringResource& str) 292 { 293 int index; 294 if (str.version == CURRENT_VERSION) { 295 index = 0; 296 } 297 else if (str.version == OLD_VERSION) { 298 index = 2; 299 } 300 else { 301 str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__); 302 return NULL; 303 } 304 if (str.config == m_sourceConfig) { 305 // index += 0; 306 } 307 else if (str.config == m_targetConfig) { 308 index += 1; 309 } 310 else { 311 str.pos.Error("unknown config for string %s: %s", str.id.c_str(), 312 str.config.ToString().c_str()); 313 return NULL; 314 } 315 switch (index) { 316 case 0: 317 return &g.source; 318 case 1: 319 return &g.target; 320 case 2: 321 return &g.altSource; 322 case 3: 323 return &g.altTarget; 324 } 325 str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__); 326 return NULL; 327 } 328 329 int 330 convert_html_to_xliff(const XMLNode* original, const string& name, XMLNode* addTo, int* phID) 331 { 332 int err = 0; 333 if (original->Type() == XMLNode::TEXT) { 334 addTo->EditChildren().push_back(original->Clone()); 335 return 0; 336 } else { 337 string ctype; 338 if (original->Namespace() == "") { 339 if (original->Name() == "b") { 340 ctype = "bold"; 341 } 342 else if (original->Name() == "i") { 343 ctype = "italic"; 344 } 345 else if (original->Name() == "u") { 346 ctype = "underline"; 347 } 348 } 349 if (ctype != "") { 350 vector<XMLAttribute> attrs; 351 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "ctype", ctype)); 352 XMLNode* copy = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, "g", 353 attrs, XMLNode::EXACT); 354 355 const vector<XMLNode*>& children = original->Children(); 356 size_t I = children.size(); 357 for (size_t i=0; i<I; i++) { 358 err |= convert_html_to_xliff(children[i], name, copy, phID); 359 } 360 return err; 361 } 362 else { 363 if (original->Namespace() == XLIFF_XMLNS) { 364 addTo->EditChildren().push_back(original->Clone()); 365 return 0; 366 } else { 367 if (original->Namespace() == "") { 368 // flatten out the tag into ph tags -- but only if there is no namespace 369 // that's still unsupported because propagating the xmlns attribute is hard. 370 vector<XMLAttribute> attrs; 371 char idStr[30]; 372 (*phID)++; 373 sprintf(idStr, "id-%d", *phID); 374 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", idStr)); 375 376 if (original->Children().size() == 0) { 377 XMLNode* ph = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, 378 "ph", attrs, XMLNode::EXACT); 379 ph->EditChildren().push_back( 380 XMLNode::NewText(original->Position(), 381 original->ToString(XLIFF_NAMESPACES), 382 XMLNode::EXACT)); 383 addTo->EditChildren().push_back(ph); 384 } else { 385 XMLNode* begin = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, 386 "bpt", attrs, XMLNode::EXACT); 387 begin->EditChildren().push_back( 388 XMLNode::NewText(original->Position(), 389 original->OpenTagToString(XLIFF_NAMESPACES, XMLNode::EXACT), 390 XMLNode::EXACT)); 391 XMLNode* end = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, 392 "ept", attrs, XMLNode::EXACT); 393 string endText = "</"; 394 endText += original->Name(); 395 endText += ">"; 396 end->EditChildren().push_back(XMLNode::NewText(original->Position(), 397 endText, XMLNode::EXACT)); 398 399 addTo->EditChildren().push_back(begin); 400 401 const vector<XMLNode*>& children = original->Children(); 402 size_t I = children.size(); 403 for (size_t i=0; i<I; i++) { 404 err |= convert_html_to_xliff(children[i], name, addTo, phID); 405 } 406 407 addTo->EditChildren().push_back(end); 408 } 409 return err; 410 } else { 411 original->Position().Error("invalid <%s> element in <%s> tag\n", 412 original->Name().c_str(), name.c_str()); 413 return 1; 414 } 415 } 416 } 417 } 418 } 419 420 XMLNode* 421 create_string_node(const StringResource& str, const string& name) 422 { 423 vector<XMLAttribute> attrs; 424 attrs.push_back(XMLAttribute(XMLNS_XMLNS, "space", "preserve")); 425 XMLNode* node = XMLNode::NewElement(str.pos, XLIFF_XMLNS, name, attrs, XMLNode::EXACT); 426 427 const vector<XMLNode*>& children = str.value->Children(); 428 size_t I = children.size(); 429 int err = 0; 430 for (size_t i=0; i<I; i++) { 431 int phID = 0; 432 err |= convert_html_to_xliff(children[i], name, node, &phID); 433 } 434 435 if (err != 0) { 436 delete node; 437 } 438 return node; 439 } 440 441 static bool 442 compare_id(const TransUnit& lhs, const TransUnit& rhs) 443 { 444 string lid, rid; 445 int lindex, rindex; 446 StringResource::ParseTypedID(lhs.id, &lid, &lindex); 447 StringResource::ParseTypedID(rhs.id, &rid, &rindex); 448 if (lid < rid) return true; 449 if (lid == rid && lindex < rindex) return true; 450 return false; 451 } 452 453 XMLNode* 454 XLIFFFile::ToXMLNode() const 455 { 456 XMLNode* root; 457 size_t N; 458 459 // <xliff> 460 { 461 vector<XMLAttribute> attrs; 462 XLIFF_NAMESPACES.AddToAttributes(&attrs); 463 attrs.push_back(XMLAttribute(XLIFF_XMLNS, "version", "1.2")); 464 root = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "xliff", attrs, XMLNode::PRETTY); 465 } 466 467 vector<TransUnit> groups; 468 469 // <file> 470 vector<File> files = m_files; 471 sort(files.begin(), files.end()); 472 const size_t I = files.size(); 473 for (size_t i=0; i<I; i++) { 474 const File& file = files[i]; 475 476 vector<XMLAttribute> fileAttrs; 477 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "datatype", "x-android-res")); 478 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "original", file.filename)); 479 480 struct timeval tv; 481 struct timezone tz; 482 gettimeofday(&tv, &tz); 483 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "date", trim_string(ctime(&tv.tv_sec)))); 484 485 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "source-language", m_sourceConfig.locale)); 486 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "target-language", m_targetConfig.locale)); 487 fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "build-num", m_currentVersion)); 488 489 XMLNode* fileNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "file", fileAttrs, 490 XMLNode::PRETTY); 491 root->EditChildren().push_back(fileNode); 492 493 // <body> 494 XMLNode* bodyNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "body", 495 vector<XMLAttribute>(), XMLNode::PRETTY); 496 fileNode->EditChildren().push_back(bodyNode); 497 498 // <trans-unit> 499 vector<TransUnit> transUnits = file.transUnits; 500 sort(transUnits.begin(), transUnits.end(), compare_id); 501 const size_t J = transUnits.size(); 502 for (size_t j=0; j<J; j++) { 503 const TransUnit& transUnit = transUnits[j]; 504 505 vector<XMLAttribute> tuAttrs; 506 507 // strings start with string: 508 tuAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", transUnit.id)); 509 XMLNode* transUnitNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "trans-unit", 510 tuAttrs, XMLNode::PRETTY); 511 bodyNode->EditChildren().push_back(transUnitNode); 512 513 // <extradata> 514 if (transUnit.source.comment != "") { 515 vector<XMLAttribute> extradataAttrs; 516 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "extradata", 517 extradataAttrs, XMLNode::EXACT); 518 transUnitNode->EditChildren().push_back(extraNode); 519 extraNode->EditChildren().push_back( 520 XMLNode::NewText(GENERATED_POS, transUnit.source.comment, 521 XMLNode::PRETTY)); 522 } 523 524 // <source> 525 if (transUnit.source.id != "") { 526 transUnitNode->EditChildren().push_back( 527 create_string_node(transUnit.source, "source")); 528 } 529 530 // <target> 531 if (transUnit.target.id != "") { 532 transUnitNode->EditChildren().push_back( 533 create_string_node(transUnit.target, "target")); 534 } 535 536 // <alt-trans> 537 if (transUnit.altSource.id != "" || transUnit.altTarget.id != "" 538 || transUnit.rejectComment != "") { 539 vector<XMLAttribute> altTransAttrs; 540 XMLNode* altTransNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "alt-trans", 541 altTransAttrs, XMLNode::PRETTY); 542 transUnitNode->EditChildren().push_back(altTransNode); 543 544 // <extradata> 545 if (transUnit.rejectComment != "") { 546 vector<XMLAttribute> extradataAttrs; 547 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, 548 "extradata", extradataAttrs, 549 XMLNode::EXACT); 550 altTransNode->EditChildren().push_back(extraNode); 551 extraNode->EditChildren().push_back( 552 XMLNode::NewText(GENERATED_POS, transUnit.rejectComment, 553 XMLNode::PRETTY)); 554 } 555 556 // <source> 557 if (transUnit.altSource.id != "") { 558 altTransNode->EditChildren().push_back( 559 create_string_node(transUnit.altSource, "source")); 560 } 561 562 // <target> 563 if (transUnit.altTarget.id != "") { 564 altTransNode->EditChildren().push_back( 565 create_string_node(transUnit.altTarget, "target")); 566 } 567 } 568 569 } 570 } 571 572 return root; 573 } 574 575 576 string 577 XLIFFFile::ToString() const 578 { 579 XMLNode* xml = ToXMLNode(); 580 string s = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"; 581 s += xml->ToString(XLIFF_NAMESPACES); 582 delete xml; 583 s += '\n'; 584 return s; 585 } 586 587 Stats 588 XLIFFFile::GetStats(const string& config) const 589 { 590 Stats stat; 591 stat.config = config; 592 stat.files = m_files.size(); 593 stat.toBeTranslated = 0; 594 stat.noComments = 0; 595 596 for (vector<File>::const_iterator file=m_files.begin(); file!=m_files.end(); file++) { 597 stat.toBeTranslated += file->transUnits.size(); 598 599 for (vector<TransUnit>::const_iterator tu=file->transUnits.begin(); 600 tu!=file->transUnits.end(); tu++) { 601 if (tu->source.comment == "") { 602 stat.noComments++; 603 } 604 } 605 } 606 607 stat.totalStrings = stat.toBeTranslated; 608 609 return stat; 610 } 611