Home | History | Annotate | Download | only in xml
      1 /*
      2  * Copyright 2005 Maksim Orlovich <maksim (at) kde.org>
      3  * Copyright (C) 2006 Apple Computer, Inc.
      4  * Copyright (C) 2007 Alexey Proskuryakov <ap (at) webkit.org>
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  *
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "XPathParser.h"
     30 
     31 #if ENABLE(XPATH)
     32 
     33 #include "ExceptionCode.h"
     34 #include "StringHash.h"
     35 #include "XPathEvaluator.h"
     36 #include "XPathException.h"
     37 #include "XPathNSResolver.h"
     38 #include "XPathStep.h"
     39 #include <wtf/StdLibExtras.h>
     40 
     41 int xpathyyparse(void*);
     42 
     43 using namespace WTF;
     44 using namespace Unicode;
     45 
     46 namespace WebCore {
     47 namespace XPath {
     48 
     49 class LocationPath;
     50 
     51 #include "XPathGrammar.h"
     52 
     53 Parser* Parser::currentParser = 0;
     54 
     55 enum XMLCat { NameStart, NameCont, NotPartOfName };
     56 
     57 typedef HashMap<String, Step::Axis> AxisNamesMap;
     58 
     59 static XMLCat charCat(UChar aChar)
     60 {
     61     //### might need to add some special cases from the XML spec.
     62 
     63     if (aChar == '_')
     64         return NameStart;
     65 
     66     if (aChar == '.' || aChar == '-')
     67         return NameCont;
     68     CharCategory category = Unicode::category(aChar);
     69     if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
     70         return NameStart;
     71     if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
     72         return NameCont;
     73     return NotPartOfName;
     74 }
     75 
     76 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
     77 {
     78     struct AxisName {
     79         const char* name;
     80         Step::Axis axis;
     81     };
     82     const AxisName axisNameList[] = {
     83         { "ancestor", Step::AncestorAxis },
     84         { "ancestor-or-self", Step::AncestorOrSelfAxis },
     85         { "attribute", Step::AttributeAxis },
     86         { "child", Step::ChildAxis },
     87         { "descendant", Step::DescendantAxis },
     88         { "descendant-or-self", Step::DescendantOrSelfAxis },
     89         { "following", Step::FollowingAxis },
     90         { "following-sibling", Step::FollowingSiblingAxis },
     91         { "namespace", Step::NamespaceAxis },
     92         { "parent", Step::ParentAxis },
     93         { "preceding", Step::PrecedingAxis },
     94         { "preceding-sibling", Step::PrecedingSiblingAxis },
     95         { "self", Step::SelfAxis }
     96     };
     97     for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
     98         axisNames.set(axisNameList[i].name, axisNameList[i].axis);
     99 }
    100 
    101 static bool isAxisName(const String& name, Step::Axis& type)
    102 {
    103     DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
    104 
    105     if (axisNames.isEmpty())
    106         setUpAxisNamesMap(axisNames);
    107 
    108     AxisNamesMap::iterator it = axisNames.find(name);
    109     if (it == axisNames.end())
    110         return false;
    111     type = it->second;
    112     return true;
    113 }
    114 
    115 static bool isNodeTypeName(const String& name)
    116 {
    117     DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
    118     if (nodeTypeNames.isEmpty()) {
    119         nodeTypeNames.add("comment");
    120         nodeTypeNames.add("text");
    121         nodeTypeNames.add("processing-instruction");
    122         nodeTypeNames.add("node");
    123     }
    124     return nodeTypeNames.contains(name);
    125 }
    126 
    127 /* Returns whether the last parsed token matches the [32] Operator rule
    128  * (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate
    129  * the tokens.
    130  */
    131 bool Parser::isOperatorContext() const
    132 {
    133     if (m_nextPos == 0)
    134         return false;
    135 
    136     switch (m_lastTokenType) {
    137         case AND: case OR: case MULOP:
    138         case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
    139         case EQOP: case RELOP:
    140         case '@': case AXISNAME:   case '(': case '[':
    141             return false;
    142         default:
    143             return true;
    144     }
    145 }
    146 
    147 void Parser::skipWS()
    148 {
    149     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
    150         ++m_nextPos;
    151 }
    152 
    153 Token Parser::makeTokenAndAdvance(int code, int advance)
    154 {
    155     m_nextPos += advance;
    156     return Token(code);
    157 }
    158 
    159 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
    160 {
    161     m_nextPos += advance;
    162     return Token(code, val);
    163 }
    164 
    165 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
    166 {
    167     m_nextPos += advance;
    168     return Token(code, val);
    169 }
    170 
    171 // Returns next char if it's there and interesting, 0 otherwise
    172 char Parser::peekAheadHelper()
    173 {
    174     if (m_nextPos + 1 >= m_data.length())
    175         return 0;
    176     UChar next = m_data[m_nextPos + 1];
    177     if (next >= 0xff)
    178         return 0;
    179     return next;
    180 }
    181 
    182 char Parser::peekCurHelper()
    183 {
    184     if (m_nextPos >= m_data.length())
    185         return 0;
    186     UChar next = m_data[m_nextPos];
    187     if (next >= 0xff)
    188         return 0;
    189     return next;
    190 }
    191 
    192 Token Parser::lexString()
    193 {
    194     UChar delimiter = m_data[m_nextPos];
    195     int startPos = m_nextPos + 1;
    196 
    197     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
    198         if (m_data[m_nextPos] == delimiter) {
    199             String value = m_data.substring(startPos, m_nextPos - startPos);
    200             if (value.isNull())
    201                 value = "";
    202             ++m_nextPos; // Consume the char.
    203             return Token(LITERAL, value);
    204         }
    205     }
    206 
    207     // Ouch, went off the end -- report error.
    208     return Token(XPATH_ERROR);
    209 }
    210 
    211 Token Parser::lexNumber()
    212 {
    213     int startPos = m_nextPos;
    214     bool seenDot = false;
    215 
    216     // Go until end or a non-digits character.
    217     for (; m_nextPos < m_data.length(); ++m_nextPos) {
    218         UChar aChar = m_data[m_nextPos];
    219         if (aChar >= 0xff) break;
    220 
    221         if (aChar < '0' || aChar > '9') {
    222             if (aChar == '.' && !seenDot)
    223                 seenDot = true;
    224             else
    225                 break;
    226         }
    227     }
    228 
    229     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
    230 }
    231 
    232 bool Parser::lexNCName(String& name)
    233 {
    234     int startPos = m_nextPos;
    235     if (m_nextPos >= m_data.length())
    236         return false;
    237 
    238     if (charCat(m_data[m_nextPos]) != NameStart)
    239         return false;
    240 
    241     // Keep going until we get a character that's not good for names.
    242     for (; m_nextPos < m_data.length(); ++m_nextPos)
    243         if (charCat(m_data[m_nextPos]) == NotPartOfName)
    244             break;
    245 
    246     name = m_data.substring(startPos, m_nextPos - startPos);
    247     return true;
    248 }
    249 
    250 bool Parser::lexQName(String& name)
    251 {
    252     String n1;
    253     if (!lexNCName(n1))
    254         return false;
    255 
    256     skipWS();
    257 
    258     // If the next character is :, what we just got it the prefix, if not,
    259     // it's the whole thing.
    260     if (peekAheadHelper() != ':') {
    261         name = n1;
    262         return true;
    263     }
    264 
    265     String n2;
    266     if (!lexNCName(n2))
    267         return false;
    268 
    269     name = n1 + ":" + n2;
    270     return true;
    271 }
    272 
    273 Token Parser::nextTokenInternal()
    274 {
    275     skipWS();
    276 
    277     if (m_nextPos >= m_data.length())
    278         return Token(0);
    279 
    280     char code = peekCurHelper();
    281     switch (code) {
    282         case '(': case ')': case '[': case ']':
    283         case '@': case ',': case '|':
    284             return makeTokenAndAdvance(code);
    285         case '\'':
    286         case '\"':
    287             return lexString();
    288         case '0': case '1': case '2': case '3': case '4':
    289         case '5': case '6': case '7': case '8': case '9':
    290             return lexNumber();
    291         case '.': {
    292             char next = peekAheadHelper();
    293             if (next == '.')
    294                 return makeTokenAndAdvance(DOTDOT, 2);
    295             if (next >= '0' && next <= '9')
    296                 return lexNumber();
    297             return makeTokenAndAdvance('.');
    298         }
    299         case '/':
    300             if (peekAheadHelper() == '/')
    301                 return makeTokenAndAdvance(SLASHSLASH, 2);
    302             return makeTokenAndAdvance('/');
    303         case '+':
    304             return makeTokenAndAdvance(PLUS);
    305         case '-':
    306             return makeTokenAndAdvance(MINUS);
    307         case '=':
    308             return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
    309         case '!':
    310             if (peekAheadHelper() == '=')
    311                 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
    312             return Token(XPATH_ERROR);
    313         case '<':
    314             if (peekAheadHelper() == '=')
    315                 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
    316             return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
    317         case '>':
    318             if (peekAheadHelper() == '=')
    319                 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
    320             return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
    321         case '*':
    322             if (isOperatorContext())
    323                 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
    324             ++m_nextPos;
    325             return Token(NAMETEST, "*");
    326         case '$': { // $ QName
    327             m_nextPos++;
    328             String name;
    329             if (!lexQName(name))
    330                 return Token(XPATH_ERROR);
    331             return Token(VARIABLEREFERENCE, name);
    332         }
    333     }
    334 
    335     String name;
    336     if (!lexNCName(name))
    337         return Token(XPATH_ERROR);
    338 
    339     skipWS();
    340     // If we're in an operator context, check for any operator names
    341     if (isOperatorContext()) {
    342         if (name == "and") //### hash?
    343             return Token(AND);
    344         if (name == "or")
    345             return Token(OR);
    346         if (name == "mod")
    347             return Token(MULOP, NumericOp::OP_Mod);
    348         if (name == "div")
    349             return Token(MULOP, NumericOp::OP_Div);
    350     }
    351 
    352     // See whether we are at a :
    353     if (peekCurHelper() == ':') {
    354         m_nextPos++;
    355         // Any chance it's an axis name?
    356         if (peekCurHelper() == ':') {
    357             m_nextPos++;
    358 
    359             //It might be an axis name.
    360             Step::Axis axis;
    361             if (isAxisName(name, axis))
    362                 return Token(AXISNAME, axis);
    363             // Ugh, :: is only valid in axis names -> error
    364             return Token(XPATH_ERROR);
    365         }
    366 
    367         // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
    368         skipWS();
    369         if (peekCurHelper() == '*') {
    370             m_nextPos++;
    371             return Token(NAMETEST, name + ":*");
    372         }
    373 
    374         // Make a full qname.
    375         String n2;
    376         if (!lexNCName(n2))
    377             return Token(XPATH_ERROR);
    378 
    379         name = name + ":" + n2;
    380     }
    381 
    382     skipWS();
    383     if (peekCurHelper() == '(') {
    384         //note: we don't swallow the (here!
    385 
    386         //either node type of function name
    387         if (isNodeTypeName(name)) {
    388             if (name == "processing-instruction")
    389                 return Token(PI, name);
    390 
    391             return Token(NODETYPE, name);
    392         }
    393         //must be a function name.
    394         return Token(FUNCTIONNAME, name);
    395     }
    396 
    397     // At this point, it must be NAMETEST.
    398     return Token(NAMETEST, name);
    399 }
    400 
    401 Token Parser::nextToken()
    402 {
    403     Token toRet = nextTokenInternal();
    404     m_lastTokenType = toRet.type;
    405     return toRet;
    406 }
    407 
    408 Parser::Parser()
    409 {
    410     reset(String());
    411 }
    412 
    413 void Parser::reset(const String& data)
    414 {
    415     m_nextPos = 0;
    416     m_data = data;
    417     m_lastTokenType = 0;
    418 
    419     m_topExpr = 0;
    420     m_gotNamespaceError = false;
    421 }
    422 
    423 int Parser::lex(void* data)
    424 {
    425     YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
    426     Token tok = nextToken();
    427 
    428     switch (tok.type) {
    429         case AXISNAME:
    430             yylval->axis = tok.axis;
    431             break;
    432         case MULOP:
    433             yylval->numop = tok.numop;
    434             break;
    435         case RELOP:
    436         case EQOP:
    437             yylval->eqop = tok.eqop;
    438             break;
    439         case NODETYPE:
    440         case PI:
    441         case FUNCTIONNAME:
    442         case LITERAL:
    443         case VARIABLEREFERENCE:
    444         case NUMBER:
    445         case NAMETEST:
    446             yylval->str = new String(tok.str);
    447             registerString(yylval->str);
    448             break;
    449     }
    450 
    451     return tok.type;
    452 }
    453 
    454 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
    455 {
    456     int colon = qName.find(':');
    457     if (colon >= 0) {
    458         if (!m_resolver)
    459             return false;
    460         namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
    461         if (namespaceURI.isNull())
    462             return false;
    463         localName = qName.substring(colon + 1);
    464     } else
    465         localName = qName;
    466 
    467     return true;
    468 }
    469 
    470 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
    471 {
    472     reset(statement);
    473 
    474     m_resolver = resolver;
    475 
    476     Parser* oldParser = currentParser;
    477     currentParser = this;
    478     int parseError = xpathyyparse(this);
    479     currentParser = oldParser;
    480 
    481     if (parseError) {
    482         deleteAllValues(m_parseNodes);
    483         m_parseNodes.clear();
    484 
    485         HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
    486         for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
    487             deleteAllValues(**it);
    488             delete *it;
    489         }
    490         m_predicateVectors.clear();
    491 
    492         HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
    493         for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
    494             deleteAllValues(**it);
    495             delete *it;
    496         }
    497         m_expressionVectors.clear();
    498 
    499         deleteAllValues(m_strings);
    500         m_strings.clear();
    501 
    502         deleteAllValues(m_nodeTests);
    503         m_nodeTests.clear();
    504 
    505         m_topExpr = 0;
    506 
    507         if (m_gotNamespaceError)
    508             ec = NAMESPACE_ERR;
    509         else
    510             ec = XPathException::INVALID_EXPRESSION_ERR;
    511         return 0;
    512     }
    513 
    514     ASSERT(m_parseNodes.size() == 1);
    515     ASSERT(*m_parseNodes.begin() == m_topExpr);
    516     ASSERT(m_expressionVectors.size() == 0);
    517     ASSERT(m_predicateVectors.size() == 0);
    518     ASSERT(m_strings.size() == 0);
    519     ASSERT(m_nodeTests.size() == 0);
    520 
    521     m_parseNodes.clear();
    522     Expression* result = m_topExpr;
    523     m_topExpr = 0;
    524 
    525     return result;
    526 }
    527 
    528 void Parser::registerParseNode(ParseNode* node)
    529 {
    530     if (node == 0)
    531         return;
    532 
    533     ASSERT(!m_parseNodes.contains(node));
    534 
    535     m_parseNodes.add(node);
    536 }
    537 
    538 void Parser::unregisterParseNode(ParseNode* node)
    539 {
    540     if (node == 0)
    541         return;
    542 
    543     ASSERT(m_parseNodes.contains(node));
    544 
    545     m_parseNodes.remove(node);
    546 }
    547 
    548 void Parser::registerPredicateVector(Vector<Predicate*>* vector)
    549 {
    550     if (vector == 0)
    551         return;
    552 
    553     ASSERT(!m_predicateVectors.contains(vector));
    554 
    555     m_predicateVectors.add(vector);
    556 }
    557 
    558 void Parser::deletePredicateVector(Vector<Predicate*>* vector)
    559 {
    560     if (vector == 0)
    561         return;
    562 
    563     ASSERT(m_predicateVectors.contains(vector));
    564 
    565     m_predicateVectors.remove(vector);
    566     delete vector;
    567 }
    568 
    569 
    570 void Parser::registerExpressionVector(Vector<Expression*>* vector)
    571 {
    572     if (vector == 0)
    573         return;
    574 
    575     ASSERT(!m_expressionVectors.contains(vector));
    576 
    577     m_expressionVectors.add(vector);
    578 }
    579 
    580 void Parser::deleteExpressionVector(Vector<Expression*>* vector)
    581 {
    582     if (vector == 0)
    583         return;
    584 
    585     ASSERT(m_expressionVectors.contains(vector));
    586 
    587     m_expressionVectors.remove(vector);
    588     delete vector;
    589 }
    590 
    591 void Parser::registerString(String* s)
    592 {
    593     if (s == 0)
    594         return;
    595 
    596     ASSERT(!m_strings.contains(s));
    597 
    598     m_strings.add(s);
    599 }
    600 
    601 void Parser::deleteString(String* s)
    602 {
    603     if (s == 0)
    604         return;
    605 
    606     ASSERT(m_strings.contains(s));
    607 
    608     m_strings.remove(s);
    609     delete s;
    610 }
    611 
    612 void Parser::registerNodeTest(Step::NodeTest* t)
    613 {
    614     if (t == 0)
    615         return;
    616 
    617     ASSERT(!m_nodeTests.contains(t));
    618 
    619     m_nodeTests.add(t);
    620 }
    621 
    622 void Parser::deleteNodeTest(Step::NodeTest* t)
    623 {
    624     if (t == 0)
    625         return;
    626 
    627     ASSERT(m_nodeTests.contains(t));
    628 
    629     m_nodeTests.remove(t);
    630     delete t;
    631 }
    632 
    633 }
    634 }
    635 
    636 #endif // ENABLE(XPATH)
    637