Home | History | Annotate | Download | only in xml
      1 /*
      2  * Copyright 2005 Maksim Orlovich <maksim (at) kde.org>
      3  * Copyright (C) 2006 Apple Computer, Inc.
      4  * Copyright (C) 2007 Alexey Proskuryakov <ap (at) webkit.org>
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  *
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "XPathParser.h"
     30 
     31 #if ENABLE(XPATH)
     32 
     33 #include "ExceptionCode.h"
     34 #include "XPathEvaluator.h"
     35 #include "XPathException.h"
     36 #include "XPathNSResolver.h"
     37 #include "XPathStep.h"
     38 #include <wtf/StdLibExtras.h>
     39 #include <wtf/text/StringHash.h>
     40 
     41 int xpathyyparse(void*);
     42 
     43 using namespace WTF;
     44 using namespace Unicode;
     45 
     46 namespace WebCore {
     47 namespace XPath {
     48 
     49 class LocationPath;
     50 
     51 #include "XPathGrammar.h"
     52 
     53 Parser* Parser::currentParser = 0;
     54 
     55 enum XMLCat { NameStart, NameCont, NotPartOfName };
     56 
     57 typedef HashMap<String, Step::Axis> AxisNamesMap;
     58 
     59 static XMLCat charCat(UChar aChar)
     60 {
     61     //### might need to add some special cases from the XML spec.
     62 
     63     if (aChar == '_')
     64         return NameStart;
     65 
     66     if (aChar == '.' || aChar == '-')
     67         return NameCont;
     68     CharCategory category = Unicode::category(aChar);
     69     if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
     70         return NameStart;
     71     if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
     72         return NameCont;
     73     return NotPartOfName;
     74 }
     75 
     76 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
     77 {
     78     struct AxisName {
     79         const char* name;
     80         Step::Axis axis;
     81     };
     82     const AxisName axisNameList[] = {
     83         { "ancestor", Step::AncestorAxis },
     84         { "ancestor-or-self", Step::AncestorOrSelfAxis },
     85         { "attribute", Step::AttributeAxis },
     86         { "child", Step::ChildAxis },
     87         { "descendant", Step::DescendantAxis },
     88         { "descendant-or-self", Step::DescendantOrSelfAxis },
     89         { "following", Step::FollowingAxis },
     90         { "following-sibling", Step::FollowingSiblingAxis },
     91         { "namespace", Step::NamespaceAxis },
     92         { "parent", Step::ParentAxis },
     93         { "preceding", Step::PrecedingAxis },
     94         { "preceding-sibling", Step::PrecedingSiblingAxis },
     95         { "self", Step::SelfAxis }
     96     };
     97     for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
     98         axisNames.set(axisNameList[i].name, axisNameList[i].axis);
     99 }
    100 
    101 static bool isAxisName(const String& name, Step::Axis& type)
    102 {
    103     DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
    104 
    105     if (axisNames.isEmpty())
    106         setUpAxisNamesMap(axisNames);
    107 
    108     AxisNamesMap::iterator it = axisNames.find(name);
    109     if (it == axisNames.end())
    110         return false;
    111     type = it->second;
    112     return true;
    113 }
    114 
    115 static bool isNodeTypeName(const String& name)
    116 {
    117     DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
    118     if (nodeTypeNames.isEmpty()) {
    119         nodeTypeNames.add("comment");
    120         nodeTypeNames.add("text");
    121         nodeTypeNames.add("processing-instruction");
    122         nodeTypeNames.add("node");
    123     }
    124     return nodeTypeNames.contains(name);
    125 }
    126 
    127 // Returns whether the current token can possibly be a binary operator, given
    128 // the previous token. Necessary to disambiguate some of the operators
    129 // (* (multiply), div, and, or, mod) in the [32] Operator rule
    130 // (check http://www.w3.org/TR/xpath#exprlex).
    131 bool Parser::isBinaryOperatorContext() const
    132 {
    133     switch (m_lastTokenType) {
    134     case 0:
    135     case '@': case AXISNAME: case '(': case '[': case ',':
    136     case AND: case OR: case MULOP:
    137     case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
    138     case EQOP: case RELOP:
    139         return false;
    140     default:
    141         return true;
    142     }
    143 }
    144 
    145 void Parser::skipWS()
    146 {
    147     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
    148         ++m_nextPos;
    149 }
    150 
    151 Token Parser::makeTokenAndAdvance(int code, int advance)
    152 {
    153     m_nextPos += advance;
    154     return Token(code);
    155 }
    156 
    157 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
    158 {
    159     m_nextPos += advance;
    160     return Token(code, val);
    161 }
    162 
    163 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
    164 {
    165     m_nextPos += advance;
    166     return Token(code, val);
    167 }
    168 
    169 // Returns next char if it's there and interesting, 0 otherwise
    170 char Parser::peekAheadHelper()
    171 {
    172     if (m_nextPos + 1 >= m_data.length())
    173         return 0;
    174     UChar next = m_data[m_nextPos + 1];
    175     if (next >= 0xff)
    176         return 0;
    177     return next;
    178 }
    179 
    180 char Parser::peekCurHelper()
    181 {
    182     if (m_nextPos >= m_data.length())
    183         return 0;
    184     UChar next = m_data[m_nextPos];
    185     if (next >= 0xff)
    186         return 0;
    187     return next;
    188 }
    189 
    190 Token Parser::lexString()
    191 {
    192     UChar delimiter = m_data[m_nextPos];
    193     int startPos = m_nextPos + 1;
    194 
    195     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
    196         if (m_data[m_nextPos] == delimiter) {
    197             String value = m_data.substring(startPos, m_nextPos - startPos);
    198             if (value.isNull())
    199                 value = "";
    200             ++m_nextPos; // Consume the char.
    201             return Token(LITERAL, value);
    202         }
    203     }
    204 
    205     // Ouch, went off the end -- report error.
    206     return Token(XPATH_ERROR);
    207 }
    208 
    209 Token Parser::lexNumber()
    210 {
    211     int startPos = m_nextPos;
    212     bool seenDot = false;
    213 
    214     // Go until end or a non-digits character.
    215     for (; m_nextPos < m_data.length(); ++m_nextPos) {
    216         UChar aChar = m_data[m_nextPos];
    217         if (aChar >= 0xff) break;
    218 
    219         if (aChar < '0' || aChar > '9') {
    220             if (aChar == '.' && !seenDot)
    221                 seenDot = true;
    222             else
    223                 break;
    224         }
    225     }
    226 
    227     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
    228 }
    229 
    230 bool Parser::lexNCName(String& name)
    231 {
    232     int startPos = m_nextPos;
    233     if (m_nextPos >= m_data.length())
    234         return false;
    235 
    236     if (charCat(m_data[m_nextPos]) != NameStart)
    237         return false;
    238 
    239     // Keep going until we get a character that's not good for names.
    240     for (; m_nextPos < m_data.length(); ++m_nextPos)
    241         if (charCat(m_data[m_nextPos]) == NotPartOfName)
    242             break;
    243 
    244     name = m_data.substring(startPos, m_nextPos - startPos);
    245     return true;
    246 }
    247 
    248 bool Parser::lexQName(String& name)
    249 {
    250     String n1;
    251     if (!lexNCName(n1))
    252         return false;
    253 
    254     skipWS();
    255 
    256     // If the next character is :, what we just got it the prefix, if not,
    257     // it's the whole thing.
    258     if (peekAheadHelper() != ':') {
    259         name = n1;
    260         return true;
    261     }
    262 
    263     String n2;
    264     if (!lexNCName(n2))
    265         return false;
    266 
    267     name = n1 + ":" + n2;
    268     return true;
    269 }
    270 
    271 Token Parser::nextTokenInternal()
    272 {
    273     skipWS();
    274 
    275     if (m_nextPos >= m_data.length())
    276         return Token(0);
    277 
    278     char code = peekCurHelper();
    279     switch (code) {
    280     case '(': case ')': case '[': case ']':
    281     case '@': case ',': case '|':
    282         return makeTokenAndAdvance(code);
    283     case '\'':
    284     case '\"':
    285         return lexString();
    286     case '0': case '1': case '2': case '3': case '4':
    287     case '5': case '6': case '7': case '8': case '9':
    288         return lexNumber();
    289     case '.': {
    290         char next = peekAheadHelper();
    291         if (next == '.')
    292             return makeTokenAndAdvance(DOTDOT, 2);
    293         if (next >= '0' && next <= '9')
    294             return lexNumber();
    295         return makeTokenAndAdvance('.');
    296     }
    297     case '/':
    298         if (peekAheadHelper() == '/')
    299             return makeTokenAndAdvance(SLASHSLASH, 2);
    300         return makeTokenAndAdvance('/');
    301     case '+':
    302         return makeTokenAndAdvance(PLUS);
    303     case '-':
    304         return makeTokenAndAdvance(MINUS);
    305     case '=':
    306         return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
    307     case '!':
    308         if (peekAheadHelper() == '=')
    309             return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
    310         return Token(XPATH_ERROR);
    311     case '<':
    312         if (peekAheadHelper() == '=')
    313             return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
    314         return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
    315     case '>':
    316         if (peekAheadHelper() == '=')
    317             return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
    318         return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
    319     case '*':
    320         if (isBinaryOperatorContext())
    321             return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
    322         ++m_nextPos;
    323         return Token(NAMETEST, "*");
    324     case '$': { // $ QName
    325         m_nextPos++;
    326         String name;
    327         if (!lexQName(name))
    328             return Token(XPATH_ERROR);
    329         return Token(VARIABLEREFERENCE, name);
    330     }
    331     }
    332 
    333     String name;
    334     if (!lexNCName(name))
    335         return Token(XPATH_ERROR);
    336 
    337     skipWS();
    338     // If we're in an operator context, check for any operator names
    339     if (isBinaryOperatorContext()) {
    340         if (name == "and") //### hash?
    341             return Token(AND);
    342         if (name == "or")
    343             return Token(OR);
    344         if (name == "mod")
    345             return Token(MULOP, NumericOp::OP_Mod);
    346         if (name == "div")
    347             return Token(MULOP, NumericOp::OP_Div);
    348     }
    349 
    350     // See whether we are at a :
    351     if (peekCurHelper() == ':') {
    352         m_nextPos++;
    353         // Any chance it's an axis name?
    354         if (peekCurHelper() == ':') {
    355             m_nextPos++;
    356 
    357             //It might be an axis name.
    358             Step::Axis axis;
    359             if (isAxisName(name, axis))
    360                 return Token(AXISNAME, axis);
    361             // Ugh, :: is only valid in axis names -> error
    362             return Token(XPATH_ERROR);
    363         }
    364 
    365         // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
    366         skipWS();
    367         if (peekCurHelper() == '*') {
    368             m_nextPos++;
    369             return Token(NAMETEST, name + ":*");
    370         }
    371 
    372         // Make a full qname.
    373         String n2;
    374         if (!lexNCName(n2))
    375             return Token(XPATH_ERROR);
    376 
    377         name = name + ":" + n2;
    378     }
    379 
    380     skipWS();
    381     if (peekCurHelper() == '(') {
    382         //note: we don't swallow the (here!
    383 
    384         //either node type of function name
    385         if (isNodeTypeName(name)) {
    386             if (name == "processing-instruction")
    387                 return Token(PI, name);
    388 
    389             return Token(NODETYPE, name);
    390         }
    391         //must be a function name.
    392         return Token(FUNCTIONNAME, name);
    393     }
    394 
    395     // At this point, it must be NAMETEST.
    396     return Token(NAMETEST, name);
    397 }
    398 
    399 Token Parser::nextToken()
    400 {
    401     Token toRet = nextTokenInternal();
    402     m_lastTokenType = toRet.type;
    403     return toRet;
    404 }
    405 
    406 Parser::Parser()
    407 {
    408     reset(String());
    409 }
    410 
    411 Parser::~Parser()
    412 {
    413 }
    414 
    415 void Parser::reset(const String& data)
    416 {
    417     m_nextPos = 0;
    418     m_data = data;
    419     m_lastTokenType = 0;
    420 
    421     m_topExpr = 0;
    422     m_gotNamespaceError = false;
    423 }
    424 
    425 int Parser::lex(void* data)
    426 {
    427     YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
    428     Token tok = nextToken();
    429 
    430     switch (tok.type) {
    431     case AXISNAME:
    432         yylval->axis = tok.axis;
    433         break;
    434     case MULOP:
    435         yylval->numop = tok.numop;
    436         break;
    437     case RELOP:
    438     case EQOP:
    439         yylval->eqop = tok.eqop;
    440         break;
    441     case NODETYPE:
    442     case PI:
    443     case FUNCTIONNAME:
    444     case LITERAL:
    445     case VARIABLEREFERENCE:
    446     case NUMBER:
    447     case NAMETEST:
    448         yylval->str = new String(tok.str);
    449         registerString(yylval->str);
    450         break;
    451     }
    452 
    453     return tok.type;
    454 }
    455 
    456 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
    457 {
    458     size_t colon = qName.find(':');
    459     if (colon != notFound) {
    460         if (!m_resolver)
    461             return false;
    462         namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
    463         if (namespaceURI.isNull())
    464             return false;
    465         localName = qName.substring(colon + 1);
    466     } else
    467         localName = qName;
    468 
    469     return true;
    470 }
    471 
    472 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
    473 {
    474     reset(statement);
    475 
    476     m_resolver = resolver;
    477 
    478     Parser* oldParser = currentParser;
    479     currentParser = this;
    480     int parseError = xpathyyparse(this);
    481     currentParser = oldParser;
    482 
    483     if (parseError) {
    484         deleteAllValues(m_parseNodes);
    485         m_parseNodes.clear();
    486 
    487         HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
    488         for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
    489             deleteAllValues(**it);
    490             delete *it;
    491         }
    492         m_predicateVectors.clear();
    493 
    494         HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
    495         for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
    496             deleteAllValues(**it);
    497             delete *it;
    498         }
    499         m_expressionVectors.clear();
    500 
    501         deleteAllValues(m_strings);
    502         m_strings.clear();
    503 
    504         deleteAllValues(m_nodeTests);
    505         m_nodeTests.clear();
    506 
    507         m_topExpr = 0;
    508 
    509         if (m_gotNamespaceError)
    510             ec = NAMESPACE_ERR;
    511         else
    512             ec = XPathException::INVALID_EXPRESSION_ERR;
    513         return 0;
    514     }
    515 
    516     ASSERT(m_parseNodes.size() == 1);
    517     ASSERT(*m_parseNodes.begin() == m_topExpr);
    518     ASSERT(m_expressionVectors.size() == 0);
    519     ASSERT(m_predicateVectors.size() == 0);
    520     ASSERT(m_strings.size() == 0);
    521     ASSERT(m_nodeTests.size() == 0);
    522 
    523     m_parseNodes.clear();
    524     Expression* result = m_topExpr;
    525     m_topExpr = 0;
    526 
    527     return result;
    528 }
    529 
    530 void Parser::registerParseNode(ParseNode* node)
    531 {
    532     if (node == 0)
    533         return;
    534 
    535     ASSERT(!m_parseNodes.contains(node));
    536 
    537     m_parseNodes.add(node);
    538 }
    539 
    540 void Parser::unregisterParseNode(ParseNode* node)
    541 {
    542     if (node == 0)
    543         return;
    544 
    545     ASSERT(m_parseNodes.contains(node));
    546 
    547     m_parseNodes.remove(node);
    548 }
    549 
    550 void Parser::registerPredicateVector(Vector<Predicate*>* vector)
    551 {
    552     if (vector == 0)
    553         return;
    554 
    555     ASSERT(!m_predicateVectors.contains(vector));
    556 
    557     m_predicateVectors.add(vector);
    558 }
    559 
    560 void Parser::deletePredicateVector(Vector<Predicate*>* vector)
    561 {
    562     if (vector == 0)
    563         return;
    564 
    565     ASSERT(m_predicateVectors.contains(vector));
    566 
    567     m_predicateVectors.remove(vector);
    568     delete vector;
    569 }
    570 
    571 
    572 void Parser::registerExpressionVector(Vector<Expression*>* vector)
    573 {
    574     if (vector == 0)
    575         return;
    576 
    577     ASSERT(!m_expressionVectors.contains(vector));
    578 
    579     m_expressionVectors.add(vector);
    580 }
    581 
    582 void Parser::deleteExpressionVector(Vector<Expression*>* vector)
    583 {
    584     if (vector == 0)
    585         return;
    586 
    587     ASSERT(m_expressionVectors.contains(vector));
    588 
    589     m_expressionVectors.remove(vector);
    590     delete vector;
    591 }
    592 
    593 void Parser::registerString(String* s)
    594 {
    595     if (s == 0)
    596         return;
    597 
    598     ASSERT(!m_strings.contains(s));
    599 
    600     m_strings.add(s);
    601 }
    602 
    603 void Parser::deleteString(String* s)
    604 {
    605     if (s == 0)
    606         return;
    607 
    608     ASSERT(m_strings.contains(s));
    609 
    610     m_strings.remove(s);
    611     delete s;
    612 }
    613 
    614 void Parser::registerNodeTest(Step::NodeTest* t)
    615 {
    616     if (t == 0)
    617         return;
    618 
    619     ASSERT(!m_nodeTests.contains(t));
    620 
    621     m_nodeTests.add(t);
    622 }
    623 
    624 void Parser::deleteNodeTest(Step::NodeTest* t)
    625 {
    626     if (t == 0)
    627         return;
    628 
    629     ASSERT(m_nodeTests.contains(t));
    630 
    631     m_nodeTests.remove(t);
    632     delete t;
    633 }
    634 
    635 }
    636 }
    637 
    638 #endif // ENABLE(XPATH)
    639