Home | History | Annotate | Download | only in xml
      1 /*
      2  * Copyright 2005 Maksim Orlovich <maksim (at) kde.org>
      3  * Copyright (C) 2006 Apple Computer, Inc.
      4  * Copyright (C) 2007 Alexey Proskuryakov <ap (at) webkit.org>
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  *
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "XPathParser.h"
     30 
     31 #if ENABLE(XPATH)
     32 
     33 #include "ExceptionCode.h"
     34 #include "XPathEvaluator.h"
     35 #include "XPathException.h"
     36 #include "XPathNSResolver.h"
     37 #include "XPathPath.h"
     38 #include "XPathStep.h"
     39 #include <wtf/StdLibExtras.h>
     40 #include <wtf/text/StringHash.h>
     41 
     42 using namespace WebCore;
     43 using namespace WTF;
     44 using namespace Unicode;
     45 using namespace XPath;
     46 
     47 extern int xpathyyparse(WebCore::XPath::Parser*);
     48 #include "XPathGrammar.h"
     49 
     50 Parser* Parser::currentParser = 0;
     51 
     52 enum XMLCat { NameStart, NameCont, NotPartOfName };
     53 
     54 typedef HashMap<String, Step::Axis> AxisNamesMap;
     55 
     56 static XMLCat charCat(UChar aChar)
     57 {
     58     //### might need to add some special cases from the XML spec.
     59 
     60     if (aChar == '_')
     61         return NameStart;
     62 
     63     if (aChar == '.' || aChar == '-')
     64         return NameCont;
     65     CharCategory category = Unicode::category(aChar);
     66     if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
     67         return NameStart;
     68     if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
     69         return NameCont;
     70     return NotPartOfName;
     71 }
     72 
     73 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
     74 {
     75     struct AxisName {
     76         const char* name;
     77         Step::Axis axis;
     78     };
     79     const AxisName axisNameList[] = {
     80         { "ancestor", Step::AncestorAxis },
     81         { "ancestor-or-self", Step::AncestorOrSelfAxis },
     82         { "attribute", Step::AttributeAxis },
     83         { "child", Step::ChildAxis },
     84         { "descendant", Step::DescendantAxis },
     85         { "descendant-or-self", Step::DescendantOrSelfAxis },
     86         { "following", Step::FollowingAxis },
     87         { "following-sibling", Step::FollowingSiblingAxis },
     88         { "namespace", Step::NamespaceAxis },
     89         { "parent", Step::ParentAxis },
     90         { "preceding", Step::PrecedingAxis },
     91         { "preceding-sibling", Step::PrecedingSiblingAxis },
     92         { "self", Step::SelfAxis }
     93     };
     94     for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
     95         axisNames.set(axisNameList[i].name, axisNameList[i].axis);
     96 }
     97 
     98 static bool isAxisName(const String& name, Step::Axis& type)
     99 {
    100     DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
    101 
    102     if (axisNames.isEmpty())
    103         setUpAxisNamesMap(axisNames);
    104 
    105     AxisNamesMap::iterator it = axisNames.find(name);
    106     if (it == axisNames.end())
    107         return false;
    108     type = it->second;
    109     return true;
    110 }
    111 
    112 static bool isNodeTypeName(const String& name)
    113 {
    114     DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
    115     if (nodeTypeNames.isEmpty()) {
    116         nodeTypeNames.add("comment");
    117         nodeTypeNames.add("text");
    118         nodeTypeNames.add("processing-instruction");
    119         nodeTypeNames.add("node");
    120     }
    121     return nodeTypeNames.contains(name);
    122 }
    123 
    124 // Returns whether the current token can possibly be a binary operator, given
    125 // the previous token. Necessary to disambiguate some of the operators
    126 // (* (multiply), div, and, or, mod) in the [32] Operator rule
    127 // (check http://www.w3.org/TR/xpath#exprlex).
    128 bool Parser::isBinaryOperatorContext() const
    129 {
    130     switch (m_lastTokenType) {
    131     case 0:
    132     case '@': case AXISNAME: case '(': case '[': case ',':
    133     case AND: case OR: case MULOP:
    134     case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
    135     case EQOP: case RELOP:
    136         return false;
    137     default:
    138         return true;
    139     }
    140 }
    141 
    142 void Parser::skipWS()
    143 {
    144     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
    145         ++m_nextPos;
    146 }
    147 
    148 Token Parser::makeTokenAndAdvance(int code, int advance)
    149 {
    150     m_nextPos += advance;
    151     return Token(code);
    152 }
    153 
    154 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
    155 {
    156     m_nextPos += advance;
    157     return Token(code, val);
    158 }
    159 
    160 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
    161 {
    162     m_nextPos += advance;
    163     return Token(code, val);
    164 }
    165 
    166 // Returns next char if it's there and interesting, 0 otherwise
    167 char Parser::peekAheadHelper()
    168 {
    169     if (m_nextPos + 1 >= m_data.length())
    170         return 0;
    171     UChar next = m_data[m_nextPos + 1];
    172     if (next >= 0xff)
    173         return 0;
    174     return next;
    175 }
    176 
    177 char Parser::peekCurHelper()
    178 {
    179     if (m_nextPos >= m_data.length())
    180         return 0;
    181     UChar next = m_data[m_nextPos];
    182     if (next >= 0xff)
    183         return 0;
    184     return next;
    185 }
    186 
    187 Token Parser::lexString()
    188 {
    189     UChar delimiter = m_data[m_nextPos];
    190     int startPos = m_nextPos + 1;
    191 
    192     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
    193         if (m_data[m_nextPos] == delimiter) {
    194             String value = m_data.substring(startPos, m_nextPos - startPos);
    195             if (value.isNull())
    196                 value = "";
    197             ++m_nextPos; // Consume the char.
    198             return Token(LITERAL, value);
    199         }
    200     }
    201 
    202     // Ouch, went off the end -- report error.
    203     return Token(XPATH_ERROR);
    204 }
    205 
    206 Token Parser::lexNumber()
    207 {
    208     int startPos = m_nextPos;
    209     bool seenDot = false;
    210 
    211     // Go until end or a non-digits character.
    212     for (; m_nextPos < m_data.length(); ++m_nextPos) {
    213         UChar aChar = m_data[m_nextPos];
    214         if (aChar >= 0xff) break;
    215 
    216         if (aChar < '0' || aChar > '9') {
    217             if (aChar == '.' && !seenDot)
    218                 seenDot = true;
    219             else
    220                 break;
    221         }
    222     }
    223 
    224     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
    225 }
    226 
    227 bool Parser::lexNCName(String& name)
    228 {
    229     int startPos = m_nextPos;
    230     if (m_nextPos >= m_data.length())
    231         return false;
    232 
    233     if (charCat(m_data[m_nextPos]) != NameStart)
    234         return false;
    235 
    236     // Keep going until we get a character that's not good for names.
    237     for (; m_nextPos < m_data.length(); ++m_nextPos)
    238         if (charCat(m_data[m_nextPos]) == NotPartOfName)
    239             break;
    240 
    241     name = m_data.substring(startPos, m_nextPos - startPos);
    242     return true;
    243 }
    244 
    245 bool Parser::lexQName(String& name)
    246 {
    247     String n1;
    248     if (!lexNCName(n1))
    249         return false;
    250 
    251     skipWS();
    252 
    253     // If the next character is :, what we just got it the prefix, if not,
    254     // it's the whole thing.
    255     if (peekAheadHelper() != ':') {
    256         name = n1;
    257         return true;
    258     }
    259 
    260     String n2;
    261     if (!lexNCName(n2))
    262         return false;
    263 
    264     name = n1 + ":" + n2;
    265     return true;
    266 }
    267 
    268 Token Parser::nextTokenInternal()
    269 {
    270     skipWS();
    271 
    272     if (m_nextPos >= m_data.length())
    273         return Token(0);
    274 
    275     char code = peekCurHelper();
    276     switch (code) {
    277     case '(': case ')': case '[': case ']':
    278     case '@': case ',': case '|':
    279         return makeTokenAndAdvance(code);
    280     case '\'':
    281     case '\"':
    282         return lexString();
    283     case '0': case '1': case '2': case '3': case '4':
    284     case '5': case '6': case '7': case '8': case '9':
    285         return lexNumber();
    286     case '.': {
    287         char next = peekAheadHelper();
    288         if (next == '.')
    289             return makeTokenAndAdvance(DOTDOT, 2);
    290         if (next >= '0' && next <= '9')
    291             return lexNumber();
    292         return makeTokenAndAdvance('.');
    293     }
    294     case '/':
    295         if (peekAheadHelper() == '/')
    296             return makeTokenAndAdvance(SLASHSLASH, 2);
    297         return makeTokenAndAdvance('/');
    298     case '+':
    299         return makeTokenAndAdvance(PLUS);
    300     case '-':
    301         return makeTokenAndAdvance(MINUS);
    302     case '=':
    303         return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
    304     case '!':
    305         if (peekAheadHelper() == '=')
    306             return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
    307         return Token(XPATH_ERROR);
    308     case '<':
    309         if (peekAheadHelper() == '=')
    310             return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
    311         return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
    312     case '>':
    313         if (peekAheadHelper() == '=')
    314             return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
    315         return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
    316     case '*':
    317         if (isBinaryOperatorContext())
    318             return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
    319         ++m_nextPos;
    320         return Token(NAMETEST, "*");
    321     case '$': { // $ QName
    322         m_nextPos++;
    323         String name;
    324         if (!lexQName(name))
    325             return Token(XPATH_ERROR);
    326         return Token(VARIABLEREFERENCE, name);
    327     }
    328     }
    329 
    330     String name;
    331     if (!lexNCName(name))
    332         return Token(XPATH_ERROR);
    333 
    334     skipWS();
    335     // If we're in an operator context, check for any operator names
    336     if (isBinaryOperatorContext()) {
    337         if (name == "and") //### hash?
    338             return Token(AND);
    339         if (name == "or")
    340             return Token(OR);
    341         if (name == "mod")
    342             return Token(MULOP, NumericOp::OP_Mod);
    343         if (name == "div")
    344             return Token(MULOP, NumericOp::OP_Div);
    345     }
    346 
    347     // See whether we are at a :
    348     if (peekCurHelper() == ':') {
    349         m_nextPos++;
    350         // Any chance it's an axis name?
    351         if (peekCurHelper() == ':') {
    352             m_nextPos++;
    353 
    354             //It might be an axis name.
    355             Step::Axis axis;
    356             if (isAxisName(name, axis))
    357                 return Token(AXISNAME, axis);
    358             // Ugh, :: is only valid in axis names -> error
    359             return Token(XPATH_ERROR);
    360         }
    361 
    362         // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
    363         skipWS();
    364         if (peekCurHelper() == '*') {
    365             m_nextPos++;
    366             return Token(NAMETEST, name + ":*");
    367         }
    368 
    369         // Make a full qname.
    370         String n2;
    371         if (!lexNCName(n2))
    372             return Token(XPATH_ERROR);
    373 
    374         name = name + ":" + n2;
    375     }
    376 
    377     skipWS();
    378     if (peekCurHelper() == '(') {
    379         //note: we don't swallow the (here!
    380 
    381         //either node type of function name
    382         if (isNodeTypeName(name)) {
    383             if (name == "processing-instruction")
    384                 return Token(PI, name);
    385 
    386             return Token(NODETYPE, name);
    387         }
    388         //must be a function name.
    389         return Token(FUNCTIONNAME, name);
    390     }
    391 
    392     // At this point, it must be NAMETEST.
    393     return Token(NAMETEST, name);
    394 }
    395 
    396 Token Parser::nextToken()
    397 {
    398     Token toRet = nextTokenInternal();
    399     m_lastTokenType = toRet.type;
    400     return toRet;
    401 }
    402 
    403 Parser::Parser()
    404 {
    405     reset(String());
    406 }
    407 
    408 Parser::~Parser()
    409 {
    410 }
    411 
    412 void Parser::reset(const String& data)
    413 {
    414     m_nextPos = 0;
    415     m_data = data;
    416     m_lastTokenType = 0;
    417 
    418     m_topExpr = 0;
    419     m_gotNamespaceError = false;
    420 }
    421 
    422 int Parser::lex(void* data)
    423 {
    424     YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
    425     Token tok = nextToken();
    426 
    427     switch (tok.type) {
    428     case AXISNAME:
    429         yylval->axis = tok.axis;
    430         break;
    431     case MULOP:
    432         yylval->numop = tok.numop;
    433         break;
    434     case RELOP:
    435     case EQOP:
    436         yylval->eqop = tok.eqop;
    437         break;
    438     case NODETYPE:
    439     case PI:
    440     case FUNCTIONNAME:
    441     case LITERAL:
    442     case VARIABLEREFERENCE:
    443     case NUMBER:
    444     case NAMETEST:
    445         yylval->str = new String(tok.str);
    446         registerString(yylval->str);
    447         break;
    448     }
    449 
    450     return tok.type;
    451 }
    452 
    453 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
    454 {
    455     size_t colon = qName.find(':');
    456     if (colon != notFound) {
    457         if (!m_resolver)
    458             return false;
    459         namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
    460         if (namespaceURI.isNull())
    461             return false;
    462         localName = qName.substring(colon + 1);
    463     } else
    464         localName = qName;
    465 
    466     return true;
    467 }
    468 
    469 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
    470 {
    471     reset(statement);
    472 
    473     m_resolver = resolver;
    474 
    475     Parser* oldParser = currentParser;
    476     currentParser = this;
    477     int parseError = xpathyyparse(this);
    478     currentParser = oldParser;
    479 
    480     if (parseError) {
    481         deleteAllValues(m_parseNodes);
    482         m_parseNodes.clear();
    483 
    484         HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
    485         for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
    486             deleteAllValues(**it);
    487             delete *it;
    488         }
    489         m_predicateVectors.clear();
    490 
    491         HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
    492         for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
    493             deleteAllValues(**it);
    494             delete *it;
    495         }
    496         m_expressionVectors.clear();
    497 
    498         deleteAllValues(m_strings);
    499         m_strings.clear();
    500 
    501         deleteAllValues(m_nodeTests);
    502         m_nodeTests.clear();
    503 
    504         m_topExpr = 0;
    505 
    506         if (m_gotNamespaceError)
    507             ec = NAMESPACE_ERR;
    508         else
    509             ec = XPathException::INVALID_EXPRESSION_ERR;
    510         return 0;
    511     }
    512 
    513     ASSERT(m_parseNodes.size() == 1);
    514     ASSERT(*m_parseNodes.begin() == m_topExpr);
    515     ASSERT(m_expressionVectors.size() == 0);
    516     ASSERT(m_predicateVectors.size() == 0);
    517     ASSERT(m_strings.size() == 0);
    518     ASSERT(m_nodeTests.size() == 0);
    519 
    520     m_parseNodes.clear();
    521     Expression* result = m_topExpr;
    522     m_topExpr = 0;
    523 
    524     return result;
    525 }
    526 
    527 void Parser::registerParseNode(ParseNode* node)
    528 {
    529     if (node == 0)
    530         return;
    531 
    532     ASSERT(!m_parseNodes.contains(node));
    533 
    534     m_parseNodes.add(node);
    535 }
    536 
    537 void Parser::unregisterParseNode(ParseNode* node)
    538 {
    539     if (node == 0)
    540         return;
    541 
    542     ASSERT(m_parseNodes.contains(node));
    543 
    544     m_parseNodes.remove(node);
    545 }
    546 
    547 void Parser::registerPredicateVector(Vector<Predicate*>* vector)
    548 {
    549     if (vector == 0)
    550         return;
    551 
    552     ASSERT(!m_predicateVectors.contains(vector));
    553 
    554     m_predicateVectors.add(vector);
    555 }
    556 
    557 void Parser::deletePredicateVector(Vector<Predicate*>* vector)
    558 {
    559     if (vector == 0)
    560         return;
    561 
    562     ASSERT(m_predicateVectors.contains(vector));
    563 
    564     m_predicateVectors.remove(vector);
    565     delete vector;
    566 }
    567 
    568 
    569 void Parser::registerExpressionVector(Vector<Expression*>* vector)
    570 {
    571     if (vector == 0)
    572         return;
    573 
    574     ASSERT(!m_expressionVectors.contains(vector));
    575 
    576     m_expressionVectors.add(vector);
    577 }
    578 
    579 void Parser::deleteExpressionVector(Vector<Expression*>* vector)
    580 {
    581     if (vector == 0)
    582         return;
    583 
    584     ASSERT(m_expressionVectors.contains(vector));
    585 
    586     m_expressionVectors.remove(vector);
    587     delete vector;
    588 }
    589 
    590 void Parser::registerString(String* s)
    591 {
    592     if (s == 0)
    593         return;
    594 
    595     ASSERT(!m_strings.contains(s));
    596 
    597     m_strings.add(s);
    598 }
    599 
    600 void Parser::deleteString(String* s)
    601 {
    602     if (s == 0)
    603         return;
    604 
    605     ASSERT(m_strings.contains(s));
    606 
    607     m_strings.remove(s);
    608     delete s;
    609 }
    610 
    611 void Parser::registerNodeTest(Step::NodeTest* t)
    612 {
    613     if (t == 0)
    614         return;
    615 
    616     ASSERT(!m_nodeTests.contains(t));
    617 
    618     m_nodeTests.add(t);
    619 }
    620 
    621 void Parser::deleteNodeTest(Step::NodeTest* t)
    622 {
    623     if (t == 0)
    624         return;
    625 
    626     ASSERT(m_nodeTests.contains(t));
    627 
    628     m_nodeTests.remove(t);
    629     delete t;
    630 }
    631 
    632 #endif // ENABLE(XPATH)
    633