Home | History | Annotate | Download | only in xml
      1 /*
      2  * Copyright 2005 Maksim Orlovich <maksim (at) kde.org>
      3  * Copyright (C) 2006 Apple Computer, Inc.
      4  * Copyright (C) 2007 Alexey Proskuryakov <ap (at) webkit.org>
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  *
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "core/xml/XPathParser.h"
     30 
     31 #include "bindings/v8/ExceptionState.h"
     32 #include "core/dom/ExceptionCode.h"
     33 #include "core/xml/XPathEvaluator.h"
     34 #include "core/xml/XPathNSResolver.h"
     35 #include "core/xml/XPathPath.h"
     36 #include "wtf/StdLibExtras.h"
     37 #include "wtf/text/StringHash.h"
     38 
     39 using namespace WebCore;
     40 using namespace WTF;
     41 using namespace Unicode;
     42 using namespace XPath;
     43 
     44 extern int xpathyyparse(WebCore::XPath::Parser*);
     45 #include "XPathGrammar.h"
     46 
     47 Parser* Parser::currentParser = 0;
     48 
     49 enum XMLCat { NameStart, NameCont, NotPartOfName };
     50 
     51 typedef HashMap<String, Step::Axis> AxisNamesMap;
     52 
     53 static XMLCat charCat(UChar aChar)
     54 {
     55     //### might need to add some special cases from the XML spec.
     56 
     57     if (aChar == '_')
     58         return NameStart;
     59 
     60     if (aChar == '.' || aChar == '-')
     61         return NameCont;
     62     CharCategory category = Unicode::category(aChar);
     63     if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
     64         return NameStart;
     65     if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
     66         return NameCont;
     67     return NotPartOfName;
     68 }
     69 
     70 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
     71 {
     72     struct AxisName {
     73         const char* name;
     74         Step::Axis axis;
     75     };
     76     const AxisName axisNameList[] = {
     77         { "ancestor", Step::AncestorAxis },
     78         { "ancestor-or-self", Step::AncestorOrSelfAxis },
     79         { "attribute", Step::AttributeAxis },
     80         { "child", Step::ChildAxis },
     81         { "descendant", Step::DescendantAxis },
     82         { "descendant-or-self", Step::DescendantOrSelfAxis },
     83         { "following", Step::FollowingAxis },
     84         { "following-sibling", Step::FollowingSiblingAxis },
     85         { "namespace", Step::NamespaceAxis },
     86         { "parent", Step::ParentAxis },
     87         { "preceding", Step::PrecedingAxis },
     88         { "preceding-sibling", Step::PrecedingSiblingAxis },
     89         { "self", Step::SelfAxis }
     90     };
     91     for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
     92         axisNames.set(axisNameList[i].name, axisNameList[i].axis);
     93 }
     94 
     95 static bool isAxisName(const String& name, Step::Axis& type)
     96 {
     97     DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
     98 
     99     if (axisNames.isEmpty())
    100         setUpAxisNamesMap(axisNames);
    101 
    102     AxisNamesMap::iterator it = axisNames.find(name);
    103     if (it == axisNames.end())
    104         return false;
    105     type = it->value;
    106     return true;
    107 }
    108 
    109 static bool isNodeTypeName(const String& name)
    110 {
    111     DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
    112     if (nodeTypeNames.isEmpty()) {
    113         nodeTypeNames.add("comment");
    114         nodeTypeNames.add("text");
    115         nodeTypeNames.add("processing-instruction");
    116         nodeTypeNames.add("node");
    117     }
    118     return nodeTypeNames.contains(name);
    119 }
    120 
    121 // Returns whether the current token can possibly be a binary operator, given
    122 // the previous token. Necessary to disambiguate some of the operators
    123 // (* (multiply), div, and, or, mod) in the [32] Operator rule
    124 // (check http://www.w3.org/TR/xpath#exprlex).
    125 bool Parser::isBinaryOperatorContext() const
    126 {
    127     switch (m_lastTokenType) {
    128     case 0:
    129     case '@': case AXISNAME: case '(': case '[': case ',':
    130     case AND: case OR: case MULOP:
    131     case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
    132     case EQOP: case RELOP:
    133         return false;
    134     default:
    135         return true;
    136     }
    137 }
    138 
    139 void Parser::skipWS()
    140 {
    141     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
    142         ++m_nextPos;
    143 }
    144 
    145 Token Parser::makeTokenAndAdvance(int code, int advance)
    146 {
    147     m_nextPos += advance;
    148     return Token(code);
    149 }
    150 
    151 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
    152 {
    153     m_nextPos += advance;
    154     return Token(code, val);
    155 }
    156 
    157 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
    158 {
    159     m_nextPos += advance;
    160     return Token(code, val);
    161 }
    162 
    163 // Returns next char if it's there and interesting, 0 otherwise
    164 char Parser::peekAheadHelper()
    165 {
    166     if (m_nextPos + 1 >= m_data.length())
    167         return 0;
    168     UChar next = m_data[m_nextPos + 1];
    169     if (next >= 0xff)
    170         return 0;
    171     return next;
    172 }
    173 
    174 char Parser::peekCurHelper()
    175 {
    176     if (m_nextPos >= m_data.length())
    177         return 0;
    178     UChar next = m_data[m_nextPos];
    179     if (next >= 0xff)
    180         return 0;
    181     return next;
    182 }
    183 
    184 Token Parser::lexString()
    185 {
    186     UChar delimiter = m_data[m_nextPos];
    187     int startPos = m_nextPos + 1;
    188 
    189     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
    190         if (m_data[m_nextPos] == delimiter) {
    191             String value = m_data.substring(startPos, m_nextPos - startPos);
    192             if (value.isNull())
    193                 value = "";
    194             ++m_nextPos; // Consume the char.
    195             return Token(LITERAL, value);
    196         }
    197     }
    198 
    199     // Ouch, went off the end -- report error.
    200     return Token(XPATH_ERROR);
    201 }
    202 
    203 Token Parser::lexNumber()
    204 {
    205     int startPos = m_nextPos;
    206     bool seenDot = false;
    207 
    208     // Go until end or a non-digits character.
    209     for (; m_nextPos < m_data.length(); ++m_nextPos) {
    210         UChar aChar = m_data[m_nextPos];
    211         if (aChar >= 0xff) break;
    212 
    213         if (aChar < '0' || aChar > '9') {
    214             if (aChar == '.' && !seenDot)
    215                 seenDot = true;
    216             else
    217                 break;
    218         }
    219     }
    220 
    221     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
    222 }
    223 
    224 bool Parser::lexNCName(String& name)
    225 {
    226     int startPos = m_nextPos;
    227     if (m_nextPos >= m_data.length())
    228         return false;
    229 
    230     if (charCat(m_data[m_nextPos]) != NameStart)
    231         return false;
    232 
    233     // Keep going until we get a character that's not good for names.
    234     for (; m_nextPos < m_data.length(); ++m_nextPos)
    235         if (charCat(m_data[m_nextPos]) == NotPartOfName)
    236             break;
    237 
    238     name = m_data.substring(startPos, m_nextPos - startPos);
    239     return true;
    240 }
    241 
    242 bool Parser::lexQName(String& name)
    243 {
    244     String n1;
    245     if (!lexNCName(n1))
    246         return false;
    247 
    248     skipWS();
    249 
    250     // If the next character is :, what we just got it the prefix, if not,
    251     // it's the whole thing.
    252     if (peekAheadHelper() != ':') {
    253         name = n1;
    254         return true;
    255     }
    256 
    257     String n2;
    258     if (!lexNCName(n2))
    259         return false;
    260 
    261     name = n1 + ":" + n2;
    262     return true;
    263 }
    264 
    265 Token Parser::nextTokenInternal()
    266 {
    267     skipWS();
    268 
    269     if (m_nextPos >= m_data.length())
    270         return Token(0);
    271 
    272     char code = peekCurHelper();
    273     switch (code) {
    274     case '(': case ')': case '[': case ']':
    275     case '@': case ',': case '|':
    276         return makeTokenAndAdvance(code);
    277     case '\'':
    278     case '\"':
    279         return lexString();
    280     case '0': case '1': case '2': case '3': case '4':
    281     case '5': case '6': case '7': case '8': case '9':
    282         return lexNumber();
    283     case '.': {
    284         char next = peekAheadHelper();
    285         if (next == '.')
    286             return makeTokenAndAdvance(DOTDOT, 2);
    287         if (next >= '0' && next <= '9')
    288             return lexNumber();
    289         return makeTokenAndAdvance('.');
    290     }
    291     case '/':
    292         if (peekAheadHelper() == '/')
    293             return makeTokenAndAdvance(SLASHSLASH, 2);
    294         return makeTokenAndAdvance('/');
    295     case '+':
    296         return makeTokenAndAdvance(PLUS);
    297     case '-':
    298         return makeTokenAndAdvance(MINUS);
    299     case '=':
    300         return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
    301     case '!':
    302         if (peekAheadHelper() == '=')
    303             return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
    304         return Token(XPATH_ERROR);
    305     case '<':
    306         if (peekAheadHelper() == '=')
    307             return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
    308         return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
    309     case '>':
    310         if (peekAheadHelper() == '=')
    311             return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
    312         return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
    313     case '*':
    314         if (isBinaryOperatorContext())
    315             return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
    316         ++m_nextPos;
    317         return Token(NAMETEST, "*");
    318     case '$': { // $ QName
    319         m_nextPos++;
    320         String name;
    321         if (!lexQName(name))
    322             return Token(XPATH_ERROR);
    323         return Token(VARIABLEREFERENCE, name);
    324     }
    325     }
    326 
    327     String name;
    328     if (!lexNCName(name))
    329         return Token(XPATH_ERROR);
    330 
    331     skipWS();
    332     // If we're in an operator context, check for any operator names
    333     if (isBinaryOperatorContext()) {
    334         if (name == "and") //### hash?
    335             return Token(AND);
    336         if (name == "or")
    337             return Token(OR);
    338         if (name == "mod")
    339             return Token(MULOP, NumericOp::OP_Mod);
    340         if (name == "div")
    341             return Token(MULOP, NumericOp::OP_Div);
    342     }
    343 
    344     // See whether we are at a :
    345     if (peekCurHelper() == ':') {
    346         m_nextPos++;
    347         // Any chance it's an axis name?
    348         if (peekCurHelper() == ':') {
    349             m_nextPos++;
    350 
    351             //It might be an axis name.
    352             Step::Axis axis;
    353             if (isAxisName(name, axis))
    354                 return Token(AXISNAME, axis);
    355             // Ugh, :: is only valid in axis names -> error
    356             return Token(XPATH_ERROR);
    357         }
    358 
    359         // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
    360         skipWS();
    361         if (peekCurHelper() == '*') {
    362             m_nextPos++;
    363             return Token(NAMETEST, name + ":*");
    364         }
    365 
    366         // Make a full qname.
    367         String n2;
    368         if (!lexNCName(n2))
    369             return Token(XPATH_ERROR);
    370 
    371         name = name + ":" + n2;
    372     }
    373 
    374     skipWS();
    375     if (peekCurHelper() == '(') {
    376         //note: we don't swallow the (here!
    377 
    378         //either node type of function name
    379         if (isNodeTypeName(name)) {
    380             if (name == "processing-instruction")
    381                 return Token(PI, name);
    382 
    383             return Token(NODETYPE, name);
    384         }
    385         //must be a function name.
    386         return Token(FUNCTIONNAME, name);
    387     }
    388 
    389     // At this point, it must be NAMETEST.
    390     return Token(NAMETEST, name);
    391 }
    392 
    393 Token Parser::nextToken()
    394 {
    395     Token toRet = nextTokenInternal();
    396     m_lastTokenType = toRet.type;
    397     return toRet;
    398 }
    399 
    400 Parser::Parser()
    401 {
    402     reset(String());
    403 }
    404 
    405 Parser::~Parser()
    406 {
    407 }
    408 
    409 void Parser::reset(const String& data)
    410 {
    411     m_nextPos = 0;
    412     m_data = data;
    413     m_lastTokenType = 0;
    414 
    415     m_topExpr = 0;
    416     m_gotNamespaceError = false;
    417 }
    418 
    419 int Parser::lex(void* data)
    420 {
    421     YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
    422     Token tok = nextToken();
    423 
    424     switch (tok.type) {
    425     case AXISNAME:
    426         yylval->axis = tok.axis;
    427         break;
    428     case MULOP:
    429         yylval->numop = tok.numop;
    430         break;
    431     case RELOP:
    432     case EQOP:
    433         yylval->eqop = tok.eqop;
    434         break;
    435     case NODETYPE:
    436     case PI:
    437     case FUNCTIONNAME:
    438     case LITERAL:
    439     case VARIABLEREFERENCE:
    440     case NUMBER:
    441     case NAMETEST:
    442         yylval->str = new String(tok.str);
    443         registerString(yylval->str);
    444         break;
    445     }
    446 
    447     return tok.type;
    448 }
    449 
    450 bool Parser::expandQName(const String& qName, AtomicString& localName, AtomicString& namespaceURI)
    451 {
    452     size_t colon = qName.find(':');
    453     if (colon != kNotFound) {
    454         if (!m_resolver)
    455             return false;
    456         namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
    457         if (namespaceURI.isNull())
    458             return false;
    459         localName = AtomicString(qName.substring(colon + 1));
    460     } else {
    461         localName = AtomicString(qName);
    462     }
    463 
    464     return true;
    465 }
    466 
    467 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionState& exceptionState)
    468 {
    469     reset(statement);
    470 
    471     m_resolver = resolver;
    472 
    473     Parser* oldParser = currentParser;
    474     currentParser = this;
    475     int parseError = xpathyyparse(this);
    476     currentParser = oldParser;
    477 
    478     if (parseError) {
    479         deleteAllValues(m_parseNodes);
    480         m_parseNodes.clear();
    481 
    482         HashSet<Vector<OwnPtr<Predicate> >*>::iterator pend = m_predicateVectors.end();
    483         for (HashSet<Vector<OwnPtr<Predicate> >*>::iterator it = m_predicateVectors.begin(); it != pend; ++it)
    484             delete *it;
    485         m_predicateVectors.clear();
    486 
    487         HashSet<Vector<OwnPtr<Expression> >*>::iterator eend = m_expressionVectors.end();
    488         for (HashSet<Vector<OwnPtr<Expression> >*>::iterator it = m_expressionVectors.begin(); it != eend; ++it)
    489             delete *it;
    490         m_expressionVectors.clear();
    491 
    492         deleteAllValues(m_strings);
    493         m_strings.clear();
    494 
    495         deleteAllValues(m_nodeTests);
    496         m_nodeTests.clear();
    497 
    498         m_topExpr = 0;
    499 
    500         if (m_gotNamespaceError)
    501             exceptionState.throwDOMException(NamespaceError, "The string '" + statement + "' contains unresolvable namespaces.");
    502         else
    503             exceptionState.throwDOMException(SyntaxError, "The string '" + statement + "' is not a valid XPath expression.");
    504         return 0;
    505     }
    506 
    507     ASSERT(m_parseNodes.size() == 1);
    508     ASSERT(*m_parseNodes.begin() == m_topExpr);
    509     ASSERT(m_expressionVectors.size() == 0);
    510     ASSERT(m_predicateVectors.size() == 0);
    511     ASSERT(m_strings.size() == 0);
    512     ASSERT(m_nodeTests.size() == 0);
    513 
    514     m_parseNodes.clear();
    515     Expression* result = m_topExpr;
    516     m_topExpr = 0;
    517 
    518     return result;
    519 }
    520 
    521 void Parser::registerParseNode(ParseNode* node)
    522 {
    523     if (node == 0)
    524         return;
    525 
    526     ASSERT(!m_parseNodes.contains(node));
    527 
    528     m_parseNodes.add(node);
    529 }
    530 
    531 void Parser::unregisterParseNode(ParseNode* node)
    532 {
    533     if (node == 0)
    534         return;
    535 
    536     ASSERT(m_parseNodes.contains(node));
    537 
    538     m_parseNodes.remove(node);
    539 }
    540 
    541 void Parser::registerPredicateVector(Vector<OwnPtr<Predicate> >* vector)
    542 {
    543     if (vector == 0)
    544         return;
    545 
    546     ASSERT(!m_predicateVectors.contains(vector));
    547 
    548     m_predicateVectors.add(vector);
    549 }
    550 
    551 void Parser::deletePredicateVector(Vector<OwnPtr<Predicate> >* vector)
    552 {
    553     if (vector == 0)
    554         return;
    555 
    556     ASSERT(m_predicateVectors.contains(vector));
    557 
    558     m_predicateVectors.remove(vector);
    559     delete vector;
    560 }
    561 
    562 
    563 void Parser::registerExpressionVector(Vector<OwnPtr<Expression> >* vector)
    564 {
    565     if (vector == 0)
    566         return;
    567 
    568     ASSERT(!m_expressionVectors.contains(vector));
    569 
    570     m_expressionVectors.add(vector);
    571 }
    572 
    573 void Parser::deleteExpressionVector(Vector<OwnPtr<Expression> >* vector)
    574 {
    575     if (vector == 0)
    576         return;
    577 
    578     ASSERT(m_expressionVectors.contains(vector));
    579 
    580     m_expressionVectors.remove(vector);
    581     delete vector;
    582 }
    583 
    584 void Parser::registerString(String* s)
    585 {
    586     if (s == 0)
    587         return;
    588 
    589     ASSERT(!m_strings.contains(s));
    590 
    591     m_strings.add(s);
    592 }
    593 
    594 void Parser::deleteString(String* s)
    595 {
    596     if (s == 0)
    597         return;
    598 
    599     ASSERT(m_strings.contains(s));
    600 
    601     m_strings.remove(s);
    602     delete s;
    603 }
    604 
    605 void Parser::registerNodeTest(Step::NodeTest* t)
    606 {
    607     if (t == 0)
    608         return;
    609 
    610     ASSERT(!m_nodeTests.contains(t));
    611 
    612     m_nodeTests.add(t);
    613 }
    614 
    615 void Parser::deleteNodeTest(Step::NodeTest* t)
    616 {
    617     if (t == 0)
    618         return;
    619 
    620     ASSERT(m_nodeTests.contains(t));
    621 
    622     m_nodeTests.remove(t);
    623     delete t;
    624 }
    625 
    626