Home | History | Annotate | Download | only in localize
      1 #include "XMLHandler.h"
      2 
      3 #include <algorithm>
      4 #include <expat.h>
      5 #include <stdio.h>
      6 #include <string.h>
      7 #include <fcntl.h>
      8 #include <unistd.h>
      9 #include <errno.h>
     10 
     11 #define NS_SEPARATOR 1
     12 #define MORE_INDENT "  "
     13 
     14 static string
     15 xml_text_escape(const string& s)
     16 {
     17     string result;
     18     const size_t N = s.length();
     19     for (size_t i=0; i<N; i++) {
     20         char c = s[i];
     21         switch (c) {
     22             case '<':
     23                 result += "&lt;";
     24                 break;
     25             case '>':
     26                 result += "&gt;";
     27                 break;
     28             case '&':
     29                 result += "&amp;";
     30                 break;
     31             default:
     32                 result += c;
     33                 break;
     34         }
     35     }
     36     return result;
     37 }
     38 
     39 static string
     40 xml_attr_escape(const string& s)
     41 {
     42     string result;
     43     const size_t N = s.length();
     44     for (size_t i=0; i<N; i++) {
     45         char c = s[i];
     46         switch (c) {
     47             case '\"':
     48                 result += "&quot;";
     49                 break;
     50             default:
     51                 result += c;
     52                 break;
     53         }
     54     }
     55     return result;
     56 }
     57 
     58 XMLNamespaceMap::XMLNamespaceMap()
     59 {
     60 }
     61 
     62 XMLNamespaceMap::XMLNamespaceMap(char const*const* nspaces)
     63 
     64 {
     65     while (*nspaces) {
     66         m_map[nspaces[1]] = nspaces[0];
     67         nspaces += 2;
     68     }
     69 }
     70 
     71 string
     72 XMLNamespaceMap::Get(const string& ns) const
     73 {
     74     if (ns == "xml") {
     75         return ns;
     76     }
     77     map<string,string>::const_iterator it = m_map.find(ns);
     78     if (it == m_map.end()) {
     79         return "";
     80     } else {
     81         return it->second;
     82     }
     83 }
     84 
     85 string
     86 XMLNamespaceMap::GetPrefix(const string& ns) const
     87 {
     88     if (ns == "") {
     89         return "";
     90     }
     91     map<string,string>::const_iterator it = m_map.find(ns);
     92     if (it != m_map.end()) {
     93         if (it->second == "") {
     94             return "";
     95         } else {
     96             return it->second + ":";
     97         }
     98     } else {
     99         return ":"; // invalid
    100     }
    101 }
    102 
    103 void
    104 XMLNamespaceMap::AddToAttributes(vector<XMLAttribute>* attrs) const
    105 {
    106     map<string,string>::const_iterator it;
    107     for (it=m_map.begin(); it!=m_map.end(); it++) {
    108         if (it->second == "xml") {
    109             continue;
    110         }
    111         XMLAttribute attr;
    112         if (it->second == "") {
    113             attr.name = "xmlns";
    114         } else {
    115             attr.name = "xmlns:";
    116             attr.name += it->second;
    117         }
    118         attr.value = it->first;
    119         attrs->push_back(attr);
    120     }
    121 }
    122 
    123 XMLAttribute::XMLAttribute()
    124 {
    125 }
    126 
    127 XMLAttribute::XMLAttribute(const XMLAttribute& that)
    128     :ns(that.ns),
    129      name(that.name),
    130      value(that.value)
    131 {
    132 }
    133 
    134 XMLAttribute::XMLAttribute(string n, string na, string v)
    135     :ns(n),
    136      name(na),
    137      value(v)
    138 {
    139 }
    140 
    141 XMLAttribute::~XMLAttribute()
    142 {
    143 }
    144 
    145 int
    146 XMLAttribute::Compare(const XMLAttribute& that) const
    147 {
    148     if (ns != that.ns) {
    149         return ns < that.ns ? -1 : 1;
    150     }
    151     if (name != that.name) {
    152         return name < that.name ? -1 : 1;
    153     }
    154     return 0;
    155 }
    156 
    157 string
    158 XMLAttribute::Find(const vector<XMLAttribute>& list, const string& ns, const string& name,
    159                     const string& def)
    160 {
    161     const size_t N = list.size();
    162     for (size_t i=0; i<N; i++) {
    163         const XMLAttribute& attr = list[i];
    164         if (attr.ns == ns && attr.name == name) {
    165             return attr.value;
    166         }
    167     }
    168     return def;
    169 }
    170 
    171 struct xml_handler_data {
    172     vector<XMLHandler*> stack;
    173     XML_Parser parser;
    174     vector<vector<XMLAttribute>*> attributes;
    175     string filename;
    176 };
    177 
    178 XMLNode::XMLNode()
    179 {
    180 }
    181 
    182 XMLNode::~XMLNode()
    183 {
    184 //    for_each(m_children.begin(), m_children.end(), delete_object<XMLNode>);
    185 }
    186 
    187 XMLNode*
    188 XMLNode::Clone() const
    189 {
    190     switch (m_type) {
    191         case ELEMENT: {
    192             XMLNode* e = XMLNode::NewElement(m_pos, m_ns, m_name, m_attrs, m_pretty);
    193             const size_t N = m_children.size();
    194             for (size_t i=0; i<N; i++) {
    195                 e->m_children.push_back(m_children[i]->Clone());
    196             }
    197             return e;
    198         }
    199         case TEXT: {
    200             return XMLNode::NewText(m_pos, m_text, m_pretty);
    201         }
    202         default:
    203             return NULL;
    204     }
    205 }
    206 
    207 XMLNode*
    208 XMLNode::NewElement(const SourcePos& pos, const string& ns, const string& name,
    209                         const vector<XMLAttribute>& attrs, int pretty)
    210 {
    211     XMLNode* node = new XMLNode();
    212         node->m_type = ELEMENT;
    213         node->m_pretty = pretty;
    214         node->m_pos = pos;
    215         node->m_ns = ns;
    216         node->m_name = name;
    217         node->m_attrs = attrs;
    218     return node;
    219 }
    220 
    221 XMLNode*
    222 XMLNode::NewText(const SourcePos& pos, const string& text, int pretty)
    223 {
    224     XMLNode* node = new XMLNode();
    225         node->m_type = TEXT;
    226         node->m_pretty = pretty;
    227         node->m_pos = pos;
    228         node->m_text = text;
    229     return node;
    230 }
    231 
    232 void
    233 XMLNode::SetPrettyRecursive(int value)
    234 {
    235     m_pretty = value;
    236     const size_t N = m_children.size();
    237     for (size_t i=0; i<N; i++) {
    238         m_children[i]->SetPrettyRecursive(value);
    239     }
    240 }
    241 
    242 string
    243 XMLNode::ContentsToString(const XMLNamespaceMap& nspaces) const
    244 {
    245     return contents_to_string(nspaces, "");
    246 }
    247 
    248 string
    249 XMLNode::ToString(const XMLNamespaceMap& nspaces) const
    250 {
    251     return to_string(nspaces, "");
    252 }
    253 
    254 string
    255 XMLNode::OpenTagToString(const XMLNamespaceMap& nspaces, int pretty) const
    256 {
    257     return open_tag_to_string(nspaces, "", pretty);
    258 }
    259 
    260 string
    261 XMLNode::contents_to_string(const XMLNamespaceMap& nspaces, const string& indent) const
    262 {
    263     string result;
    264     const size_t N = m_children.size();
    265     for (size_t i=0; i<N; i++) {
    266         const XMLNode* child = m_children[i];
    267         switch (child->Type()) {
    268         case ELEMENT:
    269             if (m_pretty == PRETTY) {
    270                 result += '\n';
    271                 result += indent;
    272             }
    273         case TEXT:
    274             result += child->to_string(nspaces, indent);
    275             break;
    276         }
    277     }
    278     return result;
    279 }
    280 
    281 string
    282 trim_string(const string& str)
    283 {
    284     const char* p = str.c_str();
    285     while (*p && isspace(*p)) {
    286         p++;
    287     }
    288     const char* q = str.c_str() + str.length() - 1;
    289     while (q > p && isspace(*q)) {
    290         q--;
    291     }
    292     q++;
    293     return string(p, q-p);
    294 }
    295 
    296 string
    297 XMLNode::open_tag_to_string(const XMLNamespaceMap& nspaces, const string& indent, int pretty) const
    298 {
    299     if (m_type != ELEMENT) {
    300         return "";
    301     }
    302     string result = "<";
    303     result += nspaces.GetPrefix(m_ns);
    304     result += m_name;
    305 
    306     vector<XMLAttribute> attrs = m_attrs;
    307 
    308     sort(attrs.begin(), attrs.end());
    309 
    310     const size_t N = attrs.size();
    311     for (size_t i=0; i<N; i++) {
    312         const XMLAttribute& attr = attrs[i];
    313         if (i == 0 || m_pretty == EXACT || pretty == EXACT) {
    314             result += ' ';
    315         }
    316         else {
    317             result += "\n";
    318             result += indent;
    319             result += MORE_INDENT;
    320             result += MORE_INDENT;
    321         }
    322         result += nspaces.GetPrefix(attr.ns);
    323         result += attr.name;
    324         result += "=\"";
    325         result += xml_attr_escape(attr.value);
    326         result += '\"';
    327     }
    328 
    329     if (m_children.size() > 0) {
    330         result += '>';
    331     } else {
    332         result += " />";
    333     }
    334     return result;
    335 }
    336 
    337 string
    338 XMLNode::to_string(const XMLNamespaceMap& nspaces, const string& indent) const
    339 {
    340     switch (m_type)
    341     {
    342         case TEXT: {
    343             if (m_pretty == EXACT) {
    344                 return xml_text_escape(m_text);
    345             } else {
    346                 return xml_text_escape(trim_string(m_text));
    347             }
    348         }
    349         case ELEMENT: {
    350             string result = open_tag_to_string(nspaces, indent, PRETTY);
    351 
    352             if (m_children.size() > 0) {
    353                 result += contents_to_string(nspaces, indent + MORE_INDENT);
    354 
    355                 if (m_pretty == PRETTY && m_children.size() > 0) {
    356                     result += '\n';
    357                     result += indent;
    358                 }
    359 
    360                 result += "</";
    361                 result += nspaces.GetPrefix(m_ns);
    362                 result += m_name;
    363                 result += '>';
    364             }
    365             return result;
    366         }
    367         default:
    368             return "";
    369     }
    370 }
    371 
    372 string
    373 XMLNode::CollapseTextContents() const
    374 {
    375     if (m_type == TEXT) {
    376         return m_text;
    377     }
    378     else if (m_type == ELEMENT) {
    379         string result;
    380 
    381         const size_t N=m_children.size();
    382         for (size_t i=0; i<N; i++) {
    383             result += m_children[i]->CollapseTextContents();
    384         }
    385 
    386         return result;
    387     }
    388     else {
    389         return "";
    390     }
    391 }
    392 
    393 vector<XMLNode*>
    394 XMLNode::GetElementsByName(const string& ns, const string& name) const
    395 {
    396     vector<XMLNode*> result;
    397     const size_t N=m_children.size();
    398     for (size_t i=0; i<N; i++) {
    399         XMLNode* child = m_children[i];
    400         if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
    401             result.push_back(child);
    402         }
    403     }
    404     return result;
    405 }
    406 
    407 XMLNode*
    408 XMLNode::GetElementByNameAt(const string& ns, const string& name, size_t index) const
    409 {
    410     vector<XMLNode*> result;
    411     const size_t N=m_children.size();
    412     for (size_t i=0; i<N; i++) {
    413         XMLNode* child = m_children[i];
    414         if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
    415             if (index == 0) {
    416                 return child;
    417             } else {
    418                 index--;
    419             }
    420         }
    421     }
    422     return NULL;
    423 }
    424 
    425 size_t
    426 XMLNode::CountElementsByName(const string& ns, const string& name) const
    427 {
    428     size_t result = 0;
    429     const size_t N=m_children.size();
    430     for (size_t i=0; i<N; i++) {
    431         XMLNode* child = m_children[i];
    432         if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
    433             result++;
    434         }
    435     }
    436     return result;
    437 }
    438 
    439 string
    440 XMLNode::GetAttribute(const string& ns, const string& name, const string& def) const
    441 {
    442     return XMLAttribute::Find(m_attrs, ns, name, def);
    443 }
    444 
    445 static void
    446 parse_namespace(const char* data, string* ns, string* name)
    447 {
    448     const char* p = strchr(data, NS_SEPARATOR);
    449     if (p != NULL) {
    450         ns->assign(data, p-data);
    451         name->assign(p+1);
    452     } else {
    453         ns->assign("");
    454         name->assign(data);
    455     }
    456 }
    457 
    458 static void
    459 convert_attrs(const char** in, vector<XMLAttribute>* out)
    460 {
    461     while (*in) {
    462         XMLAttribute attr;
    463         parse_namespace(in[0], &attr.ns, &attr.name);
    464         attr.value = in[1];
    465         out->push_back(attr);
    466         in += 2;
    467     }
    468 }
    469 
    470 static bool
    471 list_contains(const vector<XMLHandler*>& stack, XMLHandler* handler)
    472 {
    473     const size_t N = stack.size();
    474     for (size_t i=0; i<N; i++) {
    475         if (stack[i] == handler) {
    476             return true;
    477         }
    478     }
    479     return false;
    480 }
    481 
    482 static void XMLCALL
    483 start_element_handler(void *userData, const char *name, const char **attrs)
    484 {
    485     xml_handler_data* data = (xml_handler_data*)userData;
    486 
    487     XMLHandler* handler = data->stack[data->stack.size()-1];
    488 
    489     SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
    490     string nsString;
    491     string nameString;
    492     XMLHandler* next = handler;
    493     vector<XMLAttribute> attributes;
    494 
    495     parse_namespace(name, &nsString, &nameString);
    496     convert_attrs(attrs, &attributes);
    497 
    498     handler->OnStartElement(pos, nsString, nameString, attributes, &next);
    499 
    500     if (next == NULL) {
    501         next = handler;
    502     }
    503 
    504     if (next != handler) {
    505         next->elementPos = pos;
    506         next->elementNamespace = nsString;
    507         next->elementName = nameString;
    508         next->elementAttributes = attributes;
    509     }
    510 
    511     data->stack.push_back(next);
    512 }
    513 
    514 static void XMLCALL
    515 end_element_handler(void *userData, const char *name)
    516 {
    517     xml_handler_data* data = (xml_handler_data*)userData;
    518 
    519     XMLHandler* handler = data->stack[data->stack.size()-1];
    520     data->stack.pop_back();
    521 
    522     SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
    523 
    524     if (!list_contains(data->stack, handler)) {
    525         handler->OnDone(pos);
    526         if (data->stack.size() > 1) {
    527             // not top one
    528             delete handler;
    529         }
    530     }
    531 
    532     handler = data->stack[data->stack.size()-1];
    533 
    534     string nsString;
    535     string nameString;
    536 
    537     parse_namespace(name, &nsString, &nameString);
    538 
    539     handler->OnEndElement(pos, nsString, nameString);
    540 }
    541 
    542 static void XMLCALL
    543 text_handler(void *userData, const XML_Char *s, int len)
    544 {
    545     xml_handler_data* data = (xml_handler_data*)userData;
    546     XMLHandler* handler = data->stack[data->stack.size()-1];
    547     SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
    548     handler->OnText(pos, string(s, len));
    549 }
    550 
    551 static void XMLCALL
    552 comment_handler(void *userData, const char *comment)
    553 {
    554     xml_handler_data* data = (xml_handler_data*)userData;
    555     XMLHandler* handler = data->stack[data->stack.size()-1];
    556     SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
    557     handler->OnComment(pos, string(comment));
    558 }
    559 
    560 bool
    561 XMLHandler::ParseFile(const string& filename, XMLHandler* handler)
    562 {
    563     char buf[16384];
    564     int fd = open(filename.c_str(), O_RDONLY);
    565     if (fd < 0) {
    566         SourcePos(filename, -1).Error("Unable to open file for read: %s", strerror(errno));
    567         return false;
    568     }
    569 
    570     XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
    571     xml_handler_data state;
    572     state.stack.push_back(handler);
    573     state.parser = parser;
    574     state.filename = filename;
    575 
    576     XML_SetUserData(parser, &state);
    577     XML_SetElementHandler(parser, start_element_handler, end_element_handler);
    578     XML_SetCharacterDataHandler(parser, text_handler);
    579     XML_SetCommentHandler(parser, comment_handler);
    580 
    581     ssize_t len;
    582     bool done;
    583     do {
    584         len = read(fd, buf, sizeof(buf));
    585         done = len < (ssize_t)sizeof(buf);
    586         if (len < 0) {
    587             SourcePos(filename, -1).Error("Error reading file: %s\n", strerror(errno));
    588             close(fd);
    589             return false;
    590         }
    591         if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
    592             SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
    593                     "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
    594             close(fd);
    595             return false;
    596         }
    597     } while (!done);
    598 
    599     XML_ParserFree(parser);
    600 
    601     close(fd);
    602 
    603     return true;
    604 }
    605 
    606 bool
    607 XMLHandler::ParseString(const string& filename, const string& text, XMLHandler* handler)
    608 {
    609     XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
    610     xml_handler_data state;
    611     state.stack.push_back(handler);
    612     state.parser = parser;
    613     state.filename = filename;
    614 
    615     XML_SetUserData(parser, &state);
    616     XML_SetElementHandler(parser, start_element_handler, end_element_handler);
    617     XML_SetCharacterDataHandler(parser, text_handler);
    618     XML_SetCommentHandler(parser, comment_handler);
    619 
    620     if (XML_Parse(parser, text.c_str(), text.size(), true) == XML_STATUS_ERROR) {
    621         SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
    622                 "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
    623         return false;
    624     }
    625 
    626     XML_ParserFree(parser);
    627 
    628     return true;
    629 }
    630 
    631 XMLHandler::XMLHandler()
    632 {
    633 }
    634 
    635 XMLHandler::~XMLHandler()
    636 {
    637 }
    638 
    639 int
    640 XMLHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
    641                             const vector<XMLAttribute>& attrs, XMLHandler** next)
    642 {
    643     return 0;
    644 }
    645 
    646 int
    647 XMLHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
    648 {
    649     return 0;
    650 }
    651 
    652 int
    653 XMLHandler::OnText(const SourcePos& pos, const string& text)
    654 {
    655     return 0;
    656 }
    657 
    658 int
    659 XMLHandler::OnComment(const SourcePos& pos, const string& text)
    660 {
    661     return 0;
    662 }
    663 
    664 int
    665 XMLHandler::OnDone(const SourcePos& pos)
    666 {
    667     return 0;
    668 }
    669 
    670 TopElementHandler::TopElementHandler(const string& ns, const string& name, XMLHandler* next)
    671     :m_ns(ns),
    672      m_name(name),
    673      m_next(next)
    674 {
    675 }
    676 
    677 int
    678 TopElementHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
    679                             const vector<XMLAttribute>& attrs, XMLHandler** next)
    680 {
    681     *next = m_next;
    682     return 0;
    683 }
    684 
    685 int
    686 TopElementHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
    687 {
    688     return 0;
    689 }
    690 
    691 int
    692 TopElementHandler::OnText(const SourcePos& pos, const string& text)
    693 {
    694     return 0;
    695 }
    696 
    697 int
    698 TopElementHandler::OnDone(const SourcePos& pos)
    699 {
    700     return 0;
    701 }
    702 
    703 
    704 NodeHandler::NodeHandler(XMLNode* root, int pretty)
    705     :m_root(root),
    706      m_pretty(pretty)
    707 {
    708     if (root != NULL) {
    709         m_nodes.push_back(root);
    710     }
    711 }
    712 
    713 NodeHandler::~NodeHandler()
    714 {
    715 }
    716 
    717 int
    718 NodeHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
    719                             const vector<XMLAttribute>& attrs, XMLHandler** next)
    720 {
    721     int pretty;
    722     if (XMLAttribute::Find(attrs, XMLNS_XMLNS, "space", "") == "preserve") {
    723         pretty = XMLNode::EXACT;
    724     } else {
    725         if (m_root == NULL) {
    726             pretty = m_pretty;
    727         } else {
    728             pretty = m_nodes[m_nodes.size()-1]->Pretty();
    729         }
    730     }
    731     XMLNode* n = XMLNode::NewElement(pos, ns, name, attrs, pretty);
    732     if (m_root == NULL) {
    733         m_root = n;
    734     } else {
    735         m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
    736     }
    737     m_nodes.push_back(n);
    738     return 0;
    739 }
    740 
    741 int
    742 NodeHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
    743 {
    744     m_nodes.pop_back();
    745     return 0;
    746 }
    747 
    748 int
    749 NodeHandler::OnText(const SourcePos& pos, const string& text)
    750 {
    751     if (m_root == NULL) {
    752         return 1;
    753     }
    754     XMLNode* n = XMLNode::NewText(pos, text, m_nodes[m_nodes.size()-1]->Pretty());
    755     m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
    756     return 0;
    757 }
    758 
    759 int
    760 NodeHandler::OnComment(const SourcePos& pos, const string& text)
    761 {
    762     return 0;
    763 }
    764 
    765 int
    766 NodeHandler::OnDone(const SourcePos& pos)
    767 {
    768     return 0;
    769 }
    770 
    771 XMLNode*
    772 NodeHandler::ParseFile(const string& filename, int pretty)
    773 {
    774     NodeHandler handler(NULL, pretty);
    775     if (!XMLHandler::ParseFile(filename, &handler)) {
    776         fprintf(stderr, "error parsing file: %s\n", filename.c_str());
    777         return NULL;
    778     }
    779     return handler.Root();
    780 }
    781 
    782 XMLNode*
    783 NodeHandler::ParseString(const string& filename, const string& text, int pretty)
    784 {
    785     NodeHandler handler(NULL, pretty);
    786     if (!XMLHandler::ParseString(filename, text, &handler)) {
    787         fprintf(stderr, "error parsing file: %s\n", filename.c_str());
    788         return NULL;
    789     }
    790     return handler.Root();
    791 }
    792 
    793 
    794