Home | History | Annotate | Download | only in xml
      1 /*
      2  * Copyright 2005 Maksim Orlovich <maksim (at) kde.org>
      3  * Copyright (C) 2006 Apple Computer, Inc.
      4  * Copyright (C) 2007 Alexey Proskuryakov <ap (at) webkit.org>
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  *
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "core/xml/XPathParser.h"
     30 
     31 #include "bindings/v8/ExceptionState.h"
     32 #include "core/XPathGrammar.h"
     33 #include "core/dom/ExceptionCode.h"
     34 #include "core/xml/XPathEvaluator.h"
     35 #include "core/xml/XPathNSResolver.h"
     36 #include "core/xml/XPathPath.h"
     37 #include "wtf/StdLibExtras.h"
     38 #include "wtf/text/StringHash.h"
     39 
     40 using namespace WebCore;
     41 using namespace WTF;
     42 using namespace Unicode;
     43 using namespace XPath;
     44 
     45 Parser* Parser::currentParser = 0;
     46 
     47 enum XMLCat { NameStart, NameCont, NotPartOfName };
     48 
     49 typedef HashMap<String, Step::Axis> AxisNamesMap;
     50 
     51 static XMLCat charCat(UChar aChar)
     52 {
     53     // might need to add some special cases from the XML spec.
     54 
     55     if (aChar == '_')
     56         return NameStart;
     57 
     58     if (aChar == '.' || aChar == '-')
     59         return NameCont;
     60     CharCategory category = Unicode::category(aChar);
     61     if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
     62         return NameStart;
     63     if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
     64         return NameCont;
     65     return NotPartOfName;
     66 }
     67 
     68 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
     69 {
     70     struct AxisName {
     71         const char* name;
     72         Step::Axis axis;
     73     };
     74     const AxisName axisNameList[] = {
     75         { "ancestor", Step::AncestorAxis },
     76         { "ancestor-or-self", Step::AncestorOrSelfAxis },
     77         { "attribute", Step::AttributeAxis },
     78         { "child", Step::ChildAxis },
     79         { "descendant", Step::DescendantAxis },
     80         { "descendant-or-self", Step::DescendantOrSelfAxis },
     81         { "following", Step::FollowingAxis },
     82         { "following-sibling", Step::FollowingSiblingAxis },
     83         { "namespace", Step::NamespaceAxis },
     84         { "parent", Step::ParentAxis },
     85         { "preceding", Step::PrecedingAxis },
     86         { "preceding-sibling", Step::PrecedingSiblingAxis },
     87         { "self", Step::SelfAxis }
     88     };
     89     for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
     90         axisNames.set(axisNameList[i].name, axisNameList[i].axis);
     91 }
     92 
     93 static bool isAxisName(const String& name, Step::Axis& type)
     94 {
     95     DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
     96 
     97     if (axisNames.isEmpty())
     98         setUpAxisNamesMap(axisNames);
     99 
    100     AxisNamesMap::iterator it = axisNames.find(name);
    101     if (it == axisNames.end())
    102         return false;
    103     type = it->value;
    104     return true;
    105 }
    106 
    107 static bool isNodeTypeName(const String& name)
    108 {
    109     DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
    110     if (nodeTypeNames.isEmpty()) {
    111         nodeTypeNames.add("comment");
    112         nodeTypeNames.add("text");
    113         nodeTypeNames.add("processing-instruction");
    114         nodeTypeNames.add("node");
    115     }
    116     return nodeTypeNames.contains(name);
    117 }
    118 
    119 // Returns whether the current token can possibly be a binary operator, given
    120 // the previous token. Necessary to disambiguate some of the operators
    121 // (* (multiply), div, and, or, mod) in the [32] Operator rule
    122 // (check http://www.w3.org/TR/xpath#exprlex).
    123 bool Parser::isBinaryOperatorContext() const
    124 {
    125     switch (m_lastTokenType) {
    126     case 0:
    127     case '@': case AXISNAME: case '(': case '[': case ',':
    128     case AND: case OR: case MULOP:
    129     case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
    130     case EQOP: case RELOP:
    131         return false;
    132     default:
    133         return true;
    134     }
    135 }
    136 
    137 void Parser::skipWS()
    138 {
    139     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
    140         ++m_nextPos;
    141 }
    142 
    143 Token Parser::makeTokenAndAdvance(int code, int advance)
    144 {
    145     m_nextPos += advance;
    146     return Token(code);
    147 }
    148 
    149 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
    150 {
    151     m_nextPos += advance;
    152     return Token(code, val);
    153 }
    154 
    155 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
    156 {
    157     m_nextPos += advance;
    158     return Token(code, val);
    159 }
    160 
    161 // Returns next char if it's there and interesting, 0 otherwise
    162 char Parser::peekAheadHelper()
    163 {
    164     if (m_nextPos + 1 >= m_data.length())
    165         return 0;
    166     UChar next = m_data[m_nextPos + 1];
    167     if (next >= 0xff)
    168         return 0;
    169     return next;
    170 }
    171 
    172 char Parser::peekCurHelper()
    173 {
    174     if (m_nextPos >= m_data.length())
    175         return 0;
    176     UChar next = m_data[m_nextPos];
    177     if (next >= 0xff)
    178         return 0;
    179     return next;
    180 }
    181 
    182 Token Parser::lexString()
    183 {
    184     UChar delimiter = m_data[m_nextPos];
    185     int startPos = m_nextPos + 1;
    186 
    187     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
    188         if (m_data[m_nextPos] == delimiter) {
    189             String value = m_data.substring(startPos, m_nextPos - startPos);
    190             if (value.isNull())
    191                 value = "";
    192             ++m_nextPos; // Consume the char.
    193             return Token(LITERAL, value);
    194         }
    195     }
    196 
    197     // Ouch, went off the end -- report error.
    198     return Token(XPATH_ERROR);
    199 }
    200 
    201 Token Parser::lexNumber()
    202 {
    203     int startPos = m_nextPos;
    204     bool seenDot = false;
    205 
    206     // Go until end or a non-digits character.
    207     for (; m_nextPos < m_data.length(); ++m_nextPos) {
    208         UChar aChar = m_data[m_nextPos];
    209         if (aChar >= 0xff)
    210             break;
    211 
    212         if (aChar < '0' || aChar > '9') {
    213             if (aChar == '.' && !seenDot)
    214                 seenDot = true;
    215             else
    216                 break;
    217         }
    218     }
    219 
    220     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
    221 }
    222 
    223 bool Parser::lexNCName(String& name)
    224 {
    225     int startPos = m_nextPos;
    226     if (m_nextPos >= m_data.length())
    227         return false;
    228 
    229     if (charCat(m_data[m_nextPos]) != NameStart)
    230         return false;
    231 
    232     // Keep going until we get a character that's not good for names.
    233     for (; m_nextPos < m_data.length(); ++m_nextPos) {
    234         if (charCat(m_data[m_nextPos]) == NotPartOfName)
    235             break;
    236     }
    237 
    238     name = m_data.substring(startPos, m_nextPos - startPos);
    239     return true;
    240 }
    241 
    242 bool Parser::lexQName(String& name)
    243 {
    244     String n1;
    245     if (!lexNCName(n1))
    246         return false;
    247 
    248     skipWS();
    249 
    250     // If the next character is :, what we just got it the prefix, if not,
    251     // it's the whole thing.
    252     if (peekAheadHelper() != ':') {
    253         name = n1;
    254         return true;
    255     }
    256 
    257     String n2;
    258     if (!lexNCName(n2))
    259         return false;
    260 
    261     name = n1 + ":" + n2;
    262     return true;
    263 }
    264 
    265 Token Parser::nextTokenInternal()
    266 {
    267     skipWS();
    268 
    269     if (m_nextPos >= m_data.length())
    270         return Token(0);
    271 
    272     char code = peekCurHelper();
    273     switch (code) {
    274     case '(': case ')': case '[': case ']':
    275     case '@': case ',': case '|':
    276         return makeTokenAndAdvance(code);
    277     case '\'':
    278     case '\"':
    279         return lexString();
    280     case '0': case '1': case '2': case '3': case '4':
    281     case '5': case '6': case '7': case '8': case '9':
    282         return lexNumber();
    283     case '.': {
    284         char next = peekAheadHelper();
    285         if (next == '.')
    286             return makeTokenAndAdvance(DOTDOT, 2);
    287         if (next >= '0' && next <= '9')
    288             return lexNumber();
    289         return makeTokenAndAdvance('.');
    290     }
    291     case '/':
    292         if (peekAheadHelper() == '/')
    293             return makeTokenAndAdvance(SLASHSLASH, 2);
    294         return makeTokenAndAdvance('/');
    295     case '+':
    296         return makeTokenAndAdvance(PLUS);
    297     case '-':
    298         return makeTokenAndAdvance(MINUS);
    299     case '=':
    300         return makeTokenAndAdvance(EQOP, EqTestOp::OpcodeEqual);
    301     case '!':
    302         if (peekAheadHelper() == '=')
    303             return makeTokenAndAdvance(EQOP, EqTestOp::OpcodeNotEqual, 2);
    304         return Token(XPATH_ERROR);
    305     case '<':
    306         if (peekAheadHelper() == '=')
    307             return makeTokenAndAdvance(RELOP, EqTestOp::OpcodeLessOrEqual, 2);
    308         return makeTokenAndAdvance(RELOP, EqTestOp::OpcodeLessThan);
    309     case '>':
    310         if (peekAheadHelper() == '=')
    311             return makeTokenAndAdvance(RELOP, EqTestOp::OpcodeGreaterOrEqual, 2);
    312         return makeTokenAndAdvance(RELOP, EqTestOp::OpcodeGreaterThan);
    313     case '*':
    314         if (isBinaryOperatorContext())
    315             return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
    316         ++m_nextPos;
    317         return Token(NAMETEST, "*");
    318     case '$': { // $ QName
    319         m_nextPos++;
    320         String name;
    321         if (!lexQName(name))
    322             return Token(XPATH_ERROR);
    323         return Token(VARIABLEREFERENCE, name);
    324     }
    325     }
    326 
    327     String name;
    328     if (!lexNCName(name))
    329         return Token(XPATH_ERROR);
    330 
    331     skipWS();
    332     // If we're in an operator context, check for any operator names
    333     if (isBinaryOperatorContext()) {
    334         if (name == "and") // ### hash?
    335             return Token(AND);
    336         if (name == "or")
    337             return Token(OR);
    338         if (name == "mod")
    339             return Token(MULOP, NumericOp::OP_Mod);
    340         if (name == "div")
    341             return Token(MULOP, NumericOp::OP_Div);
    342     }
    343 
    344     // See whether we are at a :
    345     if (peekCurHelper() == ':') {
    346         m_nextPos++;
    347         // Any chance it's an axis name?
    348         if (peekCurHelper() == ':') {
    349             m_nextPos++;
    350 
    351             // It might be an axis name.
    352             Step::Axis axis;
    353             if (isAxisName(name, axis))
    354                 return Token(AXISNAME, axis);
    355             // Ugh, :: is only valid in axis names -> error
    356             return Token(XPATH_ERROR);
    357         }
    358 
    359         // Seems like this is a fully qualified qname, or perhaps the * modified
    360         // one from NameTest
    361         skipWS();
    362         if (peekCurHelper() == '*') {
    363             m_nextPos++;
    364             return Token(NAMETEST, name + ":*");
    365         }
    366 
    367         // Make a full qname.
    368         String n2;
    369         if (!lexNCName(n2))
    370             return Token(XPATH_ERROR);
    371 
    372         name = name + ":" + n2;
    373     }
    374 
    375     skipWS();
    376     if (peekCurHelper() == '(') {
    377         // Note: we don't swallow the ( here!
    378 
    379         // Either node type of function name
    380         if (isNodeTypeName(name)) {
    381             if (name == "processing-instruction")
    382                 return Token(PI, name);
    383 
    384             return Token(NODETYPE, name);
    385         }
    386         // Must be a function name.
    387         return Token(FUNCTIONNAME, name);
    388     }
    389 
    390     // At this point, it must be NAMETEST.
    391     return Token(NAMETEST, name);
    392 }
    393 
    394 Token Parser::nextToken()
    395 {
    396     Token toRet = nextTokenInternal();
    397     m_lastTokenType = toRet.type;
    398     return toRet;
    399 }
    400 
    401 Parser::Parser()
    402 {
    403     reset(String());
    404 }
    405 
    406 Parser::~Parser()
    407 {
    408 }
    409 
    410 void Parser::reset(const String& data)
    411 {
    412     m_nextPos = 0;
    413     m_data = data;
    414     m_lastTokenType = 0;
    415 
    416     m_topExpr = nullptr;
    417     m_gotNamespaceError = false;
    418 }
    419 
    420 int Parser::lex(void* data)
    421 {
    422     YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
    423     Token tok = nextToken();
    424 
    425     switch (tok.type) {
    426     case AXISNAME:
    427         yylval->axis = tok.axis;
    428         break;
    429     case MULOP:
    430         yylval->numop = tok.numop;
    431         break;
    432     case RELOP:
    433     case EQOP:
    434         yylval->eqop = tok.eqop;
    435         break;
    436     case NODETYPE:
    437     case PI:
    438     case FUNCTIONNAME:
    439     case LITERAL:
    440     case VARIABLEREFERENCE:
    441     case NUMBER:
    442     case NAMETEST:
    443         yylval->str = new String(tok.str);
    444         registerString(yylval->str);
    445         break;
    446     }
    447 
    448     return tok.type;
    449 }
    450 
    451 bool Parser::expandQName(const String& qName, AtomicString& localName, AtomicString& namespaceURI)
    452 {
    453     size_t colon = qName.find(':');
    454     if (colon != kNotFound) {
    455         if (!m_resolver)
    456             return false;
    457         namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
    458         if (namespaceURI.isNull())
    459             return false;
    460         localName = AtomicString(qName.substring(colon + 1));
    461     } else {
    462         localName = AtomicString(qName);
    463     }
    464 
    465     return true;
    466 }
    467 
    468 PassOwnPtrWillBeRawPtr<Expression> Parser::parseStatement(const String& statement, PassRefPtrWillBeRawPtr<XPathNSResolver> resolver, ExceptionState& exceptionState)
    469 {
    470     reset(statement);
    471 
    472     m_resolver = resolver;
    473 
    474     Parser* oldParser = currentParser;
    475     currentParser = this;
    476     int parseError = xpathyyparse(this);
    477     currentParser = oldParser;
    478 
    479     if (parseError) {
    480 #if !ENABLE(OILPAN)
    481         while (!m_parseNodes.isEmpty())
    482             delete m_parseNodes.takeAny();
    483 
    484         HashSet<Vector<OwnPtr<Predicate> >*>::iterator pend = m_predicateVectors.end();
    485         for (HashSet<Vector<OwnPtr<Predicate> >*>::iterator it = m_predicateVectors.begin(); it != pend; ++it)
    486             delete *it;
    487         m_predicateVectors.clear();
    488 
    489         HashSet<Vector<OwnPtr<Expression> >*>::iterator eend = m_expressionVectors.end();
    490         for (HashSet<Vector<OwnPtr<Expression> >*>::iterator it = m_expressionVectors.begin(); it != eend; ++it)
    491             delete *it;
    492         m_expressionVectors.clear();
    493 
    494         m_nodeTests.clear();
    495 #endif
    496 
    497         m_strings.clear();
    498 
    499         m_topExpr = nullptr;
    500 
    501         if (m_gotNamespaceError)
    502             exceptionState.throwDOMException(NamespaceError, "The string '" + statement + "' contains unresolvable namespaces.");
    503         else
    504             exceptionState.throwDOMException(SyntaxError, "The string '" + statement + "' is not a valid XPath expression.");
    505         return nullptr;
    506     }
    507     ASSERT(m_strings.size() == 0);
    508 #if !ENABLE(OILPAN)
    509     ASSERT(m_parseNodes.size() == 1);
    510     ASSERT(*m_parseNodes.begin() == m_topExpr);
    511     ASSERT(m_expressionVectors.size() == 0);
    512     ASSERT(m_predicateVectors.size() == 0);
    513     ASSERT(m_nodeTests.size() == 0);
    514     m_parseNodes.clear();
    515 #endif
    516 
    517     Expression* result = m_topExpr;
    518     m_topExpr = nullptr;
    519 
    520     return adoptPtrWillBeNoop(result);
    521 }
    522 
    523 void Parser::registerParseNode(ParseNode* node)
    524 {
    525 #if !ENABLE(OILPAN)
    526     if (node == 0)
    527         return;
    528 
    529     ASSERT(!m_parseNodes.contains(node));
    530 
    531     m_parseNodes.add(node);
    532 #endif
    533 }
    534 
    535 void Parser::unregisterParseNode(ParseNode* node)
    536 {
    537 #if !ENABLE(OILPAN)
    538     if (node == 0)
    539         return;
    540 
    541     ASSERT(m_parseNodes.contains(node));
    542 
    543     m_parseNodes.remove(node);
    544 #endif
    545 }
    546 
    547 void Parser::registerPredicateVector(WillBeHeapVector<OwnPtrWillBeMember<Predicate> >* vector)
    548 {
    549 #if !ENABLE(OILPAN)
    550     if (vector == 0)
    551         return;
    552 
    553     ASSERT(!m_predicateVectors.contains(vector));
    554 
    555     m_predicateVectors.add(vector);
    556 #endif
    557 }
    558 
    559 void Parser::deletePredicateVector(WillBeHeapVector<OwnPtrWillBeMember<Predicate> >* vector)
    560 {
    561 #if !ENABLE(OILPAN)
    562     if (vector == 0)
    563         return;
    564 
    565     ASSERT(m_predicateVectors.contains(vector));
    566 
    567     m_predicateVectors.remove(vector);
    568     delete vector;
    569 #endif
    570 }
    571 
    572 
    573 void Parser::registerExpressionVector(WillBeHeapVector<OwnPtrWillBeMember<Expression> >* vector)
    574 {
    575 #if !ENABLE(OILPAN)
    576     if (vector == 0)
    577         return;
    578 
    579     ASSERT(!m_expressionVectors.contains(vector));
    580 
    581     m_expressionVectors.add(vector);
    582 #endif
    583 }
    584 
    585 void Parser::deleteExpressionVector(WillBeHeapVector<OwnPtrWillBeMember<Expression> >* vector)
    586 {
    587 #if !ENABLE(OILPAN)
    588     if (vector == 0)
    589         return;
    590 
    591     ASSERT(m_expressionVectors.contains(vector));
    592 
    593     m_expressionVectors.remove(vector);
    594     delete vector;
    595 #endif
    596 }
    597 
    598 void Parser::registerString(String* s)
    599 {
    600     if (s == 0)
    601         return;
    602 
    603     ASSERT(!m_strings.contains(s));
    604 
    605     m_strings.add(adoptPtr(s));
    606 }
    607 
    608 void Parser::deleteString(String* s)
    609 {
    610     if (s == 0)
    611         return;
    612 
    613     ASSERT(m_strings.contains(s));
    614 
    615     m_strings.remove(s);
    616 }
    617 
    618 void Parser::registerNodeTest(Step::NodeTest* t)
    619 {
    620 #if !ENABLE(OILPAN)
    621     if (t == 0)
    622         return;
    623 
    624     ASSERT(!m_nodeTests.contains(t));
    625 
    626     m_nodeTests.add(adoptPtr(t));
    627 #endif
    628 }
    629 
    630 void Parser::deleteNodeTest(Step::NodeTest* t)
    631 {
    632 #if !ENABLE(OILPAN)
    633     if (t == 0)
    634         return;
    635 
    636     ASSERT(m_nodeTests.contains(t));
    637 
    638     m_nodeTests.remove(t);
    639 #endif
    640 }
    641 
    642