Home | History | Annotate | Download | only in localize
      1 #include "XLIFFFile.h"
      2 
      3 #include <algorithm>
      4 #include <sys/time.h>
      5 #include <time.h>
      6 #include <cstdio>
      7 
      8 const char* const XLIFF_XMLNS = "urn:oasis:names:tc:xliff:document:1.2";
      9 
     10 const char *const NS_MAP[] = {
     11     "", XLIFF_XMLNS,
     12     "xml", XMLNS_XMLNS,
     13     NULL, NULL
     14 };
     15 
     16 const XMLNamespaceMap XLIFF_NAMESPACES(NS_MAP);
     17 
     18 int
     19 XLIFFFile::File::Compare(const XLIFFFile::File& that) const
     20 {
     21     if (filename != that.filename) {
     22         return filename < that.filename ? -1 : 1;
     23     }
     24     return 0;
     25 }
     26 
     27 // =====================================================================================
     28 XLIFFFile::XLIFFFile()
     29 {
     30 }
     31 
     32 XLIFFFile::~XLIFFFile()
     33 {
     34 }
     35 
     36 static XMLNode*
     37 get_unique_node(const XMLNode* parent, const string& ns, const string& name, bool required)
     38 {
     39     size_t count = parent->CountElementsByName(ns, name);
     40     if (count == 1) {
     41         return parent->GetElementByNameAt(ns, name, 0);
     42     } else {
     43         if (required) {
     44             SourcePos pos = count == 0
     45                                 ? parent->Position()
     46                                 : parent->GetElementByNameAt(XLIFF_XMLNS, name, 1)->Position();
     47             pos.Error("<%s> elements must contain exactly one <%s> element",
     48                                 parent->Name().c_str(), name.c_str());
     49         }
     50         return NULL;
     51     }
     52 }
     53 
     54 XLIFFFile*
     55 XLIFFFile::Parse(const string& filename)
     56 {
     57     XLIFFFile* result = new XLIFFFile();
     58 
     59     XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
     60     if (root == NULL) {
     61         return NULL;
     62     }
     63 
     64     // <file>
     65     vector<XMLNode*> files = root->GetElementsByName(XLIFF_XMLNS, "file");
     66     for (size_t i=0; i<files.size(); i++) {
     67         XMLNode* file = files[i];
     68 
     69         string datatype = file->GetAttribute("", "datatype", "");
     70         string originalFile = file->GetAttribute("", "original", "");
     71 
     72         Configuration sourceConfig;
     73         sourceConfig.locale = file->GetAttribute("", "source-language", "");
     74         result->m_sourceConfig = sourceConfig;
     75 
     76         Configuration targetConfig;
     77         targetConfig.locale = file->GetAttribute("", "target-language", "");
     78         result->m_targetConfig = targetConfig;
     79 
     80         result->m_currentVersion = file->GetAttribute("", "build-num", "");
     81         result->m_oldVersion = "old";
     82 
     83         // <body>
     84         XMLNode* body = get_unique_node(file, XLIFF_XMLNS, "body", true);
     85         if (body == NULL) continue;
     86 
     87         // <trans-unit>
     88         vector<XMLNode*> transUnits = body->GetElementsByName(XLIFF_XMLNS, "trans-unit");
     89         for (size_t j=0; j<transUnits.size(); j++) {
     90             XMLNode* transUnit = transUnits[j];
     91 
     92             string rawID = transUnit->GetAttribute("", "id", "");
     93             if (rawID == "") {
     94                 transUnit->Position().Error("<trans-unit> tag requires an id");
     95                 continue;
     96             }
     97             string id;
     98             int index;
     99 
    100             if (!StringResource::ParseTypedID(rawID, &id, &index)) {
    101                 transUnit->Position().Error("<trans-unit> has invalid id '%s'\n", rawID.c_str());
    102                 continue;
    103             }
    104 
    105             // <source>
    106             XMLNode* source = get_unique_node(transUnit, XLIFF_XMLNS, "source", false);
    107             if (source != NULL) {
    108                 XMLNode* node = source->Clone();
    109                 node->SetPrettyRecursive(XMLNode::EXACT);
    110                 result->AddStringResource(StringResource(source->Position(), originalFile,
    111                             sourceConfig, id, index, node, CURRENT_VERSION,
    112                             result->m_currentVersion));
    113             }
    114 
    115             // <target>
    116             XMLNode* target = get_unique_node(transUnit, XLIFF_XMLNS, "target", false);
    117             if (target != NULL) {
    118                 XMLNode* node = target->Clone();
    119                 node->SetPrettyRecursive(XMLNode::EXACT);
    120                 result->AddStringResource(StringResource(target->Position(), originalFile,
    121                             targetConfig, id, index, node, CURRENT_VERSION,
    122                             result->m_currentVersion));
    123             }
    124 
    125             // <alt-trans>
    126             XMLNode* altTrans = get_unique_node(transUnit, XLIFF_XMLNS, "alt-trans", false);
    127             if (altTrans != NULL) {
    128                 // <source>
    129                 XMLNode* altSource = get_unique_node(altTrans, XLIFF_XMLNS, "source", false);
    130                 if (altSource != NULL) {
    131                     XMLNode* node = altSource->Clone();
    132                     node->SetPrettyRecursive(XMLNode::EXACT);
    133                     result->AddStringResource(StringResource(altSource->Position(),
    134                                 originalFile, sourceConfig, id, index, node, OLD_VERSION,
    135                                 result->m_oldVersion));
    136                 }
    137 
    138                 // <target>
    139                 XMLNode* altTarget = get_unique_node(altTrans, XLIFF_XMLNS, "target", false);
    140                 if (altTarget != NULL) {
    141                     XMLNode* node = altTarget->Clone();
    142                     node->SetPrettyRecursive(XMLNode::EXACT);
    143                     result->AddStringResource(StringResource(altTarget->Position(),
    144                                 originalFile, targetConfig, id, index, node, OLD_VERSION,
    145                                 result->m_oldVersion));
    146                 }
    147             }
    148         }
    149     }
    150     delete root;
    151     return result;
    152 }
    153 
    154 XLIFFFile*
    155 XLIFFFile::Create(const Configuration& sourceConfig, const Configuration& targetConfig,
    156                                 const string& currentVersion)
    157 {
    158     XLIFFFile* result = new XLIFFFile();
    159         result->m_sourceConfig = sourceConfig;
    160         result->m_targetConfig = targetConfig;
    161         result->m_currentVersion = currentVersion;
    162     return result;
    163 }
    164 
    165 set<string>
    166 XLIFFFile::Files() const
    167 {
    168     set<string> result;
    169     for (vector<File>::const_iterator f = m_files.begin(); f != m_files.end(); f++) {
    170         result.insert(f->filename);
    171     }
    172     return result;
    173 }
    174 
    175 void
    176 XLIFFFile::AddStringResource(const StringResource& str)
    177 {
    178     string id = str.TypedID();
    179 
    180     File* f = NULL;
    181     const size_t I = m_files.size();
    182     for (size_t i=0; i<I; i++) {
    183         if (m_files[i].filename == str.file) {
    184             f = &m_files[i];
    185             break;
    186         }
    187     }
    188     if (f == NULL) {
    189         File file;
    190         file.filename = str.file;
    191         m_files.push_back(file);
    192         f = &m_files[I];
    193     }
    194 
    195     const size_t J = f->transUnits.size();
    196     TransUnit* g = NULL;
    197     for (size_t j=0; j<J; j++) {
    198         if (f->transUnits[j].id == id) {
    199             g = &f->transUnits[j];
    200         }
    201     }
    202     if (g == NULL) {
    203         TransUnit group;
    204         group.id = id;
    205         f->transUnits.push_back(group);
    206         g = &f->transUnits[J];
    207     }
    208 
    209     StringResource* res = find_string_res(*g, str);
    210     if (res == NULL) {
    211         return ;
    212     }
    213     if (res->id != "") {
    214         str.pos.Error("Duplicate string resource: %s", res->id.c_str());
    215         res->pos.Error("Previous definition here");
    216         return ;
    217     }
    218     *res = str;
    219 
    220     m_strings.insert(str);
    221 }
    222 
    223 void
    224 XLIFFFile::Filter(bool (*func)(const string&,const TransUnit&,void*), void* cookie)
    225 {
    226     const size_t I = m_files.size();
    227     for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
    228         File& file = m_files[i];
    229 
    230         const size_t J = file.transUnits.size();
    231         for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
    232             TransUnit& tu = file.transUnits[j];
    233 
    234             bool keep = func(file.filename, tu, cookie);
    235             if (!keep) {
    236                 if (tu.source.id != "") {
    237                     m_strings.erase(tu.source);
    238                 }
    239                 if (tu.target.id != "") {
    240                     m_strings.erase(tu.target);
    241                 }
    242                 if (tu.altSource.id != "") {
    243                     m_strings.erase(tu.altSource);
    244                 }
    245                 if (tu.altTarget.id != "") {
    246                     m_strings.erase(tu.altTarget);
    247                 }
    248                 file.transUnits.erase(file.transUnits.begin()+j);
    249             }
    250         }
    251         if (file.transUnits.size() == 0) {
    252             m_files.erase(m_files.begin()+i);
    253         }
    254     }
    255 }
    256 
    257 void
    258 XLIFFFile::Map(void (*func)(const string&,TransUnit*,void*), void* cookie)
    259 {
    260     const size_t I = m_files.size();
    261     for (size_t i=0; i<I; i++) {
    262         File& file = m_files[i];
    263 
    264         const size_t J = file.transUnits.size();
    265         for (size_t j=0; j<J; j++) {
    266             func(file.filename, &(file.transUnits[j]), cookie);
    267         }
    268     }
    269 }
    270 
    271 TransUnit*
    272 XLIFFFile::EditTransUnit(const string& filename, const string& id)
    273 {
    274     const size_t I = m_files.size();
    275     for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
    276         File& file = m_files[i];
    277         if (file.filename == filename) {
    278             const size_t J = file.transUnits.size();
    279             for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
    280                 TransUnit& tu = file.transUnits[j];
    281                 if (tu.id == id) {
    282                     return &tu;
    283                 }
    284             }
    285         }
    286     }
    287     return NULL;
    288 }
    289 
    290 StringResource*
    291 XLIFFFile::find_string_res(TransUnit& g, const StringResource& str)
    292 {
    293     int index;
    294     if (str.version == CURRENT_VERSION) {
    295         index = 0;
    296     }
    297     else if (str.version == OLD_VERSION) {
    298         index = 2;
    299     }
    300     else {
    301         str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
    302         return NULL;
    303     }
    304     if (str.config == m_sourceConfig) {
    305         // index += 0;
    306     }
    307     else if (str.config == m_targetConfig) {
    308         index += 1;
    309     }
    310     else {
    311         str.pos.Error("unknown config for string %s: %s", str.id.c_str(),
    312                             str.config.ToString().c_str());
    313         return NULL;
    314     }
    315     switch (index) {
    316         case 0:
    317             return &g.source;
    318         case 1:
    319             return &g.target;
    320         case 2:
    321             return &g.altSource;
    322         case 3:
    323             return &g.altTarget;
    324     }
    325     str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
    326     return NULL;
    327 }
    328 
    329 int
    330 convert_html_to_xliff(const XMLNode* original, const string& name, XMLNode* addTo, int* phID)
    331 {
    332     int err = 0;
    333     if (original->Type() == XMLNode::TEXT) {
    334         addTo->EditChildren().push_back(original->Clone());
    335         return 0;
    336     } else {
    337         string ctype;
    338         if (original->Namespace() == "") {
    339             if (original->Name() == "b") {
    340                 ctype = "bold";
    341             }
    342             else if (original->Name() == "i") {
    343                 ctype = "italic";
    344             }
    345             else if (original->Name() == "u") {
    346                 ctype = "underline";
    347             }
    348         }
    349         if (ctype != "") {
    350             vector<XMLAttribute> attrs;
    351             attrs.push_back(XMLAttribute(XLIFF_XMLNS, "ctype", ctype));
    352             XMLNode* copy = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, "g",
    353                                                 attrs, XMLNode::EXACT);
    354 
    355             const vector<XMLNode*>& children = original->Children();
    356             size_t I = children.size();
    357             for (size_t i=0; i<I; i++) {
    358                 err |= convert_html_to_xliff(children[i], name, copy, phID);
    359             }
    360             return err;
    361         }
    362         else {
    363             if (original->Namespace() == XLIFF_XMLNS) {
    364                 addTo->EditChildren().push_back(original->Clone());
    365                 return 0;
    366             } else {
    367                 if (original->Namespace() == "") {
    368                     // flatten out the tag into ph tags -- but only if there is no namespace
    369                     // that's still unsupported because propagating the xmlns attribute is hard.
    370                     vector<XMLAttribute> attrs;
    371                     char idStr[30];
    372                     (*phID)++;
    373                     sprintf(idStr, "id-%d", *phID);
    374                     attrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", idStr));
    375 
    376                     if (original->Children().size() == 0) {
    377                         XMLNode* ph = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
    378                                 "ph", attrs, XMLNode::EXACT);
    379                         ph->EditChildren().push_back(
    380                                 XMLNode::NewText(original->Position(),
    381                                     original->ToString(XLIFF_NAMESPACES),
    382                                     XMLNode::EXACT));
    383                         addTo->EditChildren().push_back(ph);
    384                     } else {
    385                         XMLNode* begin = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
    386                                 "bpt", attrs, XMLNode::EXACT);
    387                         begin->EditChildren().push_back(
    388                                 XMLNode::NewText(original->Position(),
    389                                     original->OpenTagToString(XLIFF_NAMESPACES, XMLNode::EXACT),
    390                                     XMLNode::EXACT));
    391                         XMLNode* end = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
    392                                 "ept", attrs, XMLNode::EXACT);
    393                         string endText = "</";
    394                             endText += original->Name();
    395                             endText += ">";
    396                         end->EditChildren().push_back(XMLNode::NewText(original->Position(),
    397                                 endText, XMLNode::EXACT));
    398 
    399                         addTo->EditChildren().push_back(begin);
    400 
    401                         const vector<XMLNode*>& children = original->Children();
    402                         size_t I = children.size();
    403                         for (size_t i=0; i<I; i++) {
    404                             err |= convert_html_to_xliff(children[i], name, addTo, phID);
    405                         }
    406 
    407                         addTo->EditChildren().push_back(end);
    408                     }
    409                     return err;
    410                 } else {
    411                     original->Position().Error("invalid <%s> element in <%s> tag\n",
    412                                                 original->Name().c_str(), name.c_str());
    413                     return 1;
    414                 }
    415             }
    416         }
    417     }
    418 }
    419 
    420 XMLNode*
    421 create_string_node(const StringResource& str, const string& name)
    422 {
    423     vector<XMLAttribute> attrs;
    424     attrs.push_back(XMLAttribute(XMLNS_XMLNS, "space", "preserve"));
    425     XMLNode* node = XMLNode::NewElement(str.pos, XLIFF_XMLNS, name, attrs, XMLNode::EXACT);
    426 
    427     const vector<XMLNode*>& children = str.value->Children();
    428     size_t I = children.size();
    429     int err = 0;
    430     for (size_t i=0; i<I; i++) {
    431         int phID = 0;
    432         err |= convert_html_to_xliff(children[i], name, node, &phID);
    433     }
    434 
    435     if (err != 0) {
    436         delete node;
    437     }
    438     return node;
    439 }
    440 
    441 static bool
    442 compare_id(const TransUnit& lhs, const TransUnit& rhs)
    443 {
    444     string lid, rid;
    445     int lindex, rindex;
    446     StringResource::ParseTypedID(lhs.id, &lid, &lindex);
    447     StringResource::ParseTypedID(rhs.id, &rid, &rindex);
    448     if (lid < rid) return true;
    449     if (lid == rid && lindex < rindex) return true;
    450     return false;
    451 }
    452 
    453 XMLNode*
    454 XLIFFFile::ToXMLNode() const
    455 {
    456     XMLNode* root;
    457     size_t N;
    458 
    459     // <xliff>
    460     {
    461         vector<XMLAttribute> attrs;
    462         XLIFF_NAMESPACES.AddToAttributes(&attrs);
    463         attrs.push_back(XMLAttribute(XLIFF_XMLNS, "version", "1.2"));
    464         root = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "xliff", attrs, XMLNode::PRETTY);
    465     }
    466 
    467     vector<TransUnit> groups;
    468 
    469     // <file>
    470     vector<File> files = m_files;
    471     sort(files.begin(), files.end());
    472     const size_t I = files.size();
    473     for (size_t i=0; i<I; i++) {
    474         const File& file = files[i];
    475 
    476         vector<XMLAttribute> fileAttrs;
    477         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "datatype", "x-android-res"));
    478         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "original", file.filename));
    479 
    480         struct timeval tv;
    481         struct timezone tz;
    482         gettimeofday(&tv, &tz);
    483         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "date", trim_string(ctime(&tv.tv_sec))));
    484 
    485         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "source-language", m_sourceConfig.locale));
    486         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "target-language", m_targetConfig.locale));
    487         fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "build-num", m_currentVersion));
    488 
    489         XMLNode* fileNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "file", fileAttrs,
    490                                                 XMLNode::PRETTY);
    491         root->EditChildren().push_back(fileNode);
    492 
    493         // <body>
    494         XMLNode* bodyNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "body",
    495                                                 vector<XMLAttribute>(), XMLNode::PRETTY);
    496         fileNode->EditChildren().push_back(bodyNode);
    497 
    498         // <trans-unit>
    499         vector<TransUnit> transUnits = file.transUnits;
    500         sort(transUnits.begin(), transUnits.end(), compare_id);
    501         const size_t J = transUnits.size();
    502         for (size_t j=0; j<J; j++) {
    503             const TransUnit& transUnit = transUnits[j];
    504 
    505             vector<XMLAttribute> tuAttrs;
    506 
    507             // strings start with string:
    508             tuAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", transUnit.id));
    509             XMLNode* transUnitNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "trans-unit",
    510                                                          tuAttrs, XMLNode::PRETTY);
    511             bodyNode->EditChildren().push_back(transUnitNode);
    512 
    513             // <extradata>
    514             if (transUnit.source.comment != "") {
    515                 vector<XMLAttribute> extradataAttrs;
    516                 XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "extradata",
    517                                                             extradataAttrs, XMLNode::EXACT);
    518                 transUnitNode->EditChildren().push_back(extraNode);
    519                 extraNode->EditChildren().push_back(
    520                         XMLNode::NewText(GENERATED_POS, transUnit.source.comment,
    521                                          XMLNode::PRETTY));
    522             }
    523 
    524             // <source>
    525             if (transUnit.source.id != "") {
    526                 transUnitNode->EditChildren().push_back(
    527                                     create_string_node(transUnit.source, "source"));
    528             }
    529 
    530             // <target>
    531             if (transUnit.target.id != "") {
    532                 transUnitNode->EditChildren().push_back(
    533                                     create_string_node(transUnit.target, "target"));
    534             }
    535 
    536             // <alt-trans>
    537             if (transUnit.altSource.id != "" || transUnit.altTarget.id != ""
    538                     || transUnit.rejectComment != "") {
    539                 vector<XMLAttribute> altTransAttrs;
    540                 XMLNode* altTransNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "alt-trans",
    541                                                             altTransAttrs, XMLNode::PRETTY);
    542                 transUnitNode->EditChildren().push_back(altTransNode);
    543 
    544                 // <extradata>
    545                 if (transUnit.rejectComment != "") {
    546                     vector<XMLAttribute> extradataAttrs;
    547                     XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS,
    548                                                                 "extradata", extradataAttrs,
    549                                                                 XMLNode::EXACT);
    550                     altTransNode->EditChildren().push_back(extraNode);
    551                     extraNode->EditChildren().push_back(
    552                             XMLNode::NewText(GENERATED_POS, transUnit.rejectComment,
    553                                              XMLNode::PRETTY));
    554                 }
    555 
    556                 // <source>
    557                 if (transUnit.altSource.id != "") {
    558                     altTransNode->EditChildren().push_back(
    559                                         create_string_node(transUnit.altSource, "source"));
    560                 }
    561 
    562                 // <target>
    563                 if (transUnit.altTarget.id != "") {
    564                     altTransNode->EditChildren().push_back(
    565                                         create_string_node(transUnit.altTarget, "target"));
    566                 }
    567             }
    568 
    569         }
    570     }
    571 
    572     return root;
    573 }
    574 
    575 
    576 string
    577 XLIFFFile::ToString() const
    578 {
    579     XMLNode* xml = ToXMLNode();
    580     string s = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
    581     s += xml->ToString(XLIFF_NAMESPACES);
    582     delete xml;
    583     s += '\n';
    584     return s;
    585 }
    586 
    587 Stats
    588 XLIFFFile::GetStats(const string& config) const
    589 {
    590     Stats stat;
    591     stat.config = config;
    592     stat.files = m_files.size();
    593     stat.toBeTranslated = 0;
    594     stat.noComments = 0;
    595 
    596     for (vector<File>::const_iterator file=m_files.begin(); file!=m_files.end(); file++) {
    597         stat.toBeTranslated += file->transUnits.size();
    598 
    599         for (vector<TransUnit>::const_iterator tu=file->transUnits.begin();
    600                     tu!=file->transUnits.end(); tu++) {
    601             if (tu->source.comment == "") {
    602                 stat.noComments++;
    603             }
    604         }
    605     }
    606 
    607     stat.totalStrings = stat.toBeTranslated;
    608 
    609     return stat;
    610 }
    611