Home | History | Annotate | Download | only in xml
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "XmlDom.h"
     18 #include "XmlPullParser.h"
     19 #include "util/Util.h"
     20 
     21 #include <cassert>
     22 #include <expat.h>
     23 #include <memory>
     24 #include <stack>
     25 #include <string>
     26 #include <tuple>
     27 
     28 namespace aapt {
     29 namespace xml {
     30 
     31 constexpr char kXmlNamespaceSep = 1;
     32 
     33 struct Stack {
     34     std::unique_ptr<xml::Node> root;
     35     std::stack<xml::Node*> nodeStack;
     36     std::u16string pendingComment;
     37 };
     38 
     39 /**
     40  * Extracts the namespace and name of an expanded element or attribute name.
     41  */
     42 static void splitName(const char* name, std::u16string* outNs, std::u16string* outName) {
     43     const char* p = name;
     44     while (*p != 0 && *p != kXmlNamespaceSep) {
     45         p++;
     46     }
     47 
     48     if (*p == 0) {
     49         outNs->clear();
     50         *outName = util::utf8ToUtf16(name);
     51     } else {
     52         *outNs = util::utf8ToUtf16(StringPiece(name, (p - name)));
     53         *outName = util::utf8ToUtf16(p + 1);
     54     }
     55 }
     56 
     57 static void addToStack(Stack* stack, XML_Parser parser, std::unique_ptr<Node> node) {
     58     node->lineNumber = XML_GetCurrentLineNumber(parser);
     59     node->columnNumber = XML_GetCurrentColumnNumber(parser);
     60 
     61     Node* thisNode = node.get();
     62     if (!stack->nodeStack.empty()) {
     63         stack->nodeStack.top()->addChild(std::move(node));
     64     } else {
     65         stack->root = std::move(node);
     66     }
     67 
     68     if (!nodeCast<Text>(thisNode)) {
     69         stack->nodeStack.push(thisNode);
     70     }
     71 }
     72 
     73 static void XMLCALL startNamespaceHandler(void* userData, const char* prefix, const char* uri) {
     74     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
     75     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
     76 
     77     std::unique_ptr<Namespace> ns = util::make_unique<Namespace>();
     78     if (prefix) {
     79         ns->namespacePrefix = util::utf8ToUtf16(prefix);
     80     }
     81 
     82     if (uri) {
     83         ns->namespaceUri = util::utf8ToUtf16(uri);
     84     }
     85 
     86     addToStack(stack, parser, std::move(ns));
     87 }
     88 
     89 static void XMLCALL endNamespaceHandler(void* userData, const char* prefix) {
     90     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
     91     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
     92 
     93     assert(!stack->nodeStack.empty());
     94     stack->nodeStack.pop();
     95 }
     96 
     97 static bool lessAttribute(const Attribute& lhs, const Attribute& rhs) {
     98     return std::tie(lhs.namespaceUri, lhs.name, lhs.value) <
     99             std::tie(rhs.namespaceUri, rhs.name, rhs.value);
    100 }
    101 
    102 static void XMLCALL startElementHandler(void* userData, const char* name, const char** attrs) {
    103     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
    104     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
    105 
    106     std::unique_ptr<Element> el = util::make_unique<Element>();
    107     splitName(name, &el->namespaceUri, &el->name);
    108 
    109     while (*attrs) {
    110         Attribute attribute;
    111         splitName(*attrs++, &attribute.namespaceUri, &attribute.name);
    112         attribute.value = util::utf8ToUtf16(*attrs++);
    113 
    114         // Insert in sorted order.
    115         auto iter = std::lower_bound(el->attributes.begin(), el->attributes.end(), attribute,
    116                                      lessAttribute);
    117         el->attributes.insert(iter, std::move(attribute));
    118     }
    119 
    120     el->comment = std::move(stack->pendingComment);
    121     addToStack(stack, parser, std::move(el));
    122 }
    123 
    124 static void XMLCALL endElementHandler(void* userData, const char* name) {
    125     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
    126     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
    127 
    128     assert(!stack->nodeStack.empty());
    129     //stack->nodeStack.top()->comment = std::move(stack->pendingComment);
    130     stack->nodeStack.pop();
    131 }
    132 
    133 static void XMLCALL characterDataHandler(void* userData, const char* s, int len) {
    134     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
    135     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
    136 
    137     if (!s || len <= 0) {
    138         return;
    139     }
    140 
    141     // See if we can just append the text to a previous text node.
    142     if (!stack->nodeStack.empty()) {
    143         Node* currentParent = stack->nodeStack.top();
    144         if (!currentParent->children.empty()) {
    145             Node* lastChild = currentParent->children.back().get();
    146             if (Text* text = nodeCast<Text>(lastChild)) {
    147                 text->text += util::utf8ToUtf16(StringPiece(s, len));
    148                 return;
    149             }
    150         }
    151     }
    152 
    153     std::unique_ptr<Text> text = util::make_unique<Text>();
    154     text->text = util::utf8ToUtf16(StringPiece(s, len));
    155     addToStack(stack, parser, std::move(text));
    156 }
    157 
    158 static void XMLCALL commentDataHandler(void* userData, const char* comment) {
    159     XML_Parser parser = reinterpret_cast<XML_Parser>(userData);
    160     Stack* stack = reinterpret_cast<Stack*>(XML_GetUserData(parser));
    161 
    162     if (!stack->pendingComment.empty()) {
    163         stack->pendingComment += '\n';
    164     }
    165     stack->pendingComment += util::utf8ToUtf16(comment);
    166 }
    167 
    168 std::unique_ptr<XmlResource> inflate(std::istream* in, IDiagnostics* diag, const Source& source) {
    169     Stack stack;
    170 
    171     XML_Parser parser = XML_ParserCreateNS(nullptr, kXmlNamespaceSep);
    172     XML_SetUserData(parser, &stack);
    173     XML_UseParserAsHandlerArg(parser);
    174     XML_SetElementHandler(parser, startElementHandler, endElementHandler);
    175     XML_SetNamespaceDeclHandler(parser, startNamespaceHandler, endNamespaceHandler);
    176     XML_SetCharacterDataHandler(parser, characterDataHandler);
    177     XML_SetCommentHandler(parser, commentDataHandler);
    178 
    179     char buffer[1024];
    180     while (!in->eof()) {
    181         in->read(buffer, sizeof(buffer) / sizeof(buffer[0]));
    182         if (in->bad() && !in->eof()) {
    183             stack.root = {};
    184             diag->error(DiagMessage(source) << strerror(errno));
    185             break;
    186         }
    187 
    188         if (XML_Parse(parser, buffer, in->gcount(), in->eof()) == XML_STATUS_ERROR) {
    189             stack.root = {};
    190             diag->error(DiagMessage(source.withLine(XML_GetCurrentLineNumber(parser)))
    191                         << XML_ErrorString(XML_GetErrorCode(parser)));
    192             break;
    193         }
    194     }
    195 
    196     XML_ParserFree(parser);
    197     if (stack.root) {
    198         return util::make_unique<XmlResource>(ResourceFile{ {}, {}, source }, std::move(stack.root));
    199     }
    200     return {};
    201 }
    202 
    203 static void copyAttributes(Element* el, android::ResXMLParser* parser) {
    204     const size_t attrCount = parser->getAttributeCount();
    205     if (attrCount > 0) {
    206         el->attributes.reserve(attrCount);
    207         for (size_t i = 0; i < attrCount; i++) {
    208             Attribute attr;
    209             size_t len;
    210             const char16_t* str16 = parser->getAttributeNamespace(i, &len);
    211             if (str16) {
    212                 attr.namespaceUri.assign(str16, len);
    213             }
    214 
    215             str16 = parser->getAttributeName(i, &len);
    216             if (str16) {
    217                 attr.name.assign(str16, len);
    218             }
    219 
    220             str16 = parser->getAttributeStringValue(i, &len);
    221             if (str16) {
    222                 attr.value.assign(str16, len);
    223             }
    224             el->attributes.push_back(std::move(attr));
    225         }
    226     }
    227 }
    228 
    229 std::unique_ptr<XmlResource> inflate(const void* data, size_t dataLen, IDiagnostics* diag,
    230                                      const Source& source) {
    231     // We import the android namespace because on Windows NO_ERROR is a macro, not an enum, which
    232     // causes errors when qualifying it with android::
    233     using namespace android;
    234 
    235     std::unique_ptr<Node> root;
    236     std::stack<Node*> nodeStack;
    237 
    238     ResXMLTree tree;
    239     if (tree.setTo(data, dataLen) != NO_ERROR) {
    240         return {};
    241     }
    242 
    243     ResXMLParser::event_code_t code;
    244     while ((code = tree.next()) != ResXMLParser::BAD_DOCUMENT &&
    245             code != ResXMLParser::END_DOCUMENT) {
    246         std::unique_ptr<Node> newNode;
    247         switch (code) {
    248             case ResXMLParser::START_NAMESPACE: {
    249                 std::unique_ptr<Namespace> node = util::make_unique<Namespace>();
    250                 size_t len;
    251                 const char16_t* str16 = tree.getNamespacePrefix(&len);
    252                 if (str16) {
    253                     node->namespacePrefix.assign(str16, len);
    254                 }
    255 
    256                 str16 = tree.getNamespaceUri(&len);
    257                 if (str16) {
    258                     node->namespaceUri.assign(str16, len);
    259                 }
    260                 newNode = std::move(node);
    261                 break;
    262             }
    263 
    264             case ResXMLParser::START_TAG: {
    265                 std::unique_ptr<Element> node = util::make_unique<Element>();
    266                 size_t len;
    267                 const char16_t* str16 = tree.getElementNamespace(&len);
    268                 if (str16) {
    269                     node->namespaceUri.assign(str16, len);
    270                 }
    271 
    272                 str16 = tree.getElementName(&len);
    273                 if (str16) {
    274                     node->name.assign(str16, len);
    275                 }
    276 
    277                 copyAttributes(node.get(), &tree);
    278 
    279                 newNode = std::move(node);
    280                 break;
    281             }
    282 
    283             case ResXMLParser::TEXT: {
    284                 std::unique_ptr<Text> node = util::make_unique<Text>();
    285                 size_t len;
    286                 const char16_t* str16 = tree.getText(&len);
    287                 if (str16) {
    288                     node->text.assign(str16, len);
    289                 }
    290                 newNode = std::move(node);
    291                 break;
    292             }
    293 
    294             case ResXMLParser::END_NAMESPACE:
    295             case ResXMLParser::END_TAG:
    296                 assert(!nodeStack.empty());
    297                 nodeStack.pop();
    298                 break;
    299 
    300             default:
    301                 assert(false);
    302                 break;
    303         }
    304 
    305         if (newNode) {
    306             newNode->lineNumber = tree.getLineNumber();
    307 
    308             Node* thisNode = newNode.get();
    309             if (!root) {
    310                 assert(nodeStack.empty());
    311                 root = std::move(newNode);
    312             } else {
    313                 assert(!nodeStack.empty());
    314                 nodeStack.top()->addChild(std::move(newNode));
    315             }
    316 
    317             if (!nodeCast<Text>(thisNode)) {
    318                 nodeStack.push(thisNode);
    319             }
    320         }
    321     }
    322     return util::make_unique<XmlResource>(ResourceFile{}, std::move(root));
    323 }
    324 
    325 Element* findRootElement(XmlResource* doc) {
    326     return findRootElement(doc->root.get());
    327 }
    328 
    329 Element* findRootElement(Node* node) {
    330     if (!node) {
    331         return nullptr;
    332     }
    333 
    334     Element* el = nullptr;
    335     while ((el = nodeCast<Element>(node)) == nullptr) {
    336         if (node->children.empty()) {
    337             return nullptr;
    338         }
    339         // We are looking for the first element, and namespaces can only have one child.
    340         node = node->children.front().get();
    341     }
    342     return el;
    343 }
    344 
    345 void Node::addChild(std::unique_ptr<Node> child) {
    346     child->parent = this;
    347     children.push_back(std::move(child));
    348 }
    349 
    350 Attribute* Element::findAttribute(const StringPiece16& ns, const StringPiece16& name) {
    351     for (auto& attr : attributes) {
    352         if (ns == attr.namespaceUri && name == attr.name) {
    353             return &attr;
    354         }
    355     }
    356     return nullptr;
    357 }
    358 
    359 Element* Element::findChild(const StringPiece16& ns, const StringPiece16& name) {
    360     return findChildWithAttribute(ns, name, {}, {}, {});
    361 }
    362 
    363 Element* Element::findChildWithAttribute(const StringPiece16& ns, const StringPiece16& name,
    364                                          const StringPiece16& attrNs, const StringPiece16& attrName,
    365                                          const StringPiece16& attrValue) {
    366     for (auto& childNode : children) {
    367         Node* child = childNode.get();
    368         while (nodeCast<Namespace>(child)) {
    369             if (child->children.empty()) {
    370                 break;
    371             }
    372             child = child->children[0].get();
    373         }
    374 
    375         if (Element* el = nodeCast<Element>(child)) {
    376             if (ns == el->namespaceUri && name == el->name) {
    377                 if (attrNs.empty() && attrName.empty()) {
    378                     return el;
    379                 }
    380 
    381                 Attribute* attr = el->findAttribute(attrNs, attrName);
    382                 if (attr && attrValue == attr->value) {
    383                     return el;
    384                 }
    385             }
    386         }
    387     }
    388     return nullptr;
    389 }
    390 
    391 std::vector<Element*> Element::getChildElements() {
    392     std::vector<Element*> elements;
    393     for (auto& childNode : children) {
    394         Node* child = childNode.get();
    395         while (nodeCast<Namespace>(child)) {
    396             if (child->children.empty()) {
    397                 break;
    398             }
    399             child = child->children[0].get();
    400         }
    401 
    402         if (Element* el = nodeCast<Element>(child)) {
    403             elements.push_back(el);
    404         }
    405     }
    406     return elements;
    407 }
    408 
    409 void PackageAwareVisitor::visit(Namespace* ns) {
    410    bool added = false;
    411    if (Maybe<ExtractedPackage> maybePackage = extractPackageFromNamespace(ns->namespaceUri)) {
    412        ExtractedPackage& package = maybePackage.value();
    413        mPackageDecls.push_back(PackageDecl{ ns->namespacePrefix, std::move(package) });
    414        added = true;
    415    }
    416 
    417    Visitor::visit(ns);
    418 
    419    if (added) {
    420        mPackageDecls.pop_back();
    421    }
    422 }
    423 
    424 Maybe<ExtractedPackage> PackageAwareVisitor::transformPackageAlias(
    425        const StringPiece16& alias, const StringPiece16& localPackage) const {
    426    if (alias.empty()) {
    427        return ExtractedPackage{ localPackage.toString(), false /* private */ };
    428    }
    429 
    430    const auto rend = mPackageDecls.rend();
    431    for (auto iter = mPackageDecls.rbegin(); iter != rend; ++iter) {
    432        if (alias == iter->prefix) {
    433            if (iter->package.package.empty()) {
    434                return ExtractedPackage{ localPackage.toString(),
    435                                               iter->package.privateNamespace };
    436            }
    437            return iter->package;
    438        }
    439    }
    440    return {};
    441 }
    442 
    443 } // namespace xml
    444 } // namespace aapt
    445