1 /* 2 * Copyright 2005 Maksim Orlovich <maksim (at) kde.org> 3 * Copyright (C) 2006 Apple Computer, Inc. 4 * Copyright (C) 2007 Alexey Proskuryakov <ap (at) webkit.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "config.h" 29 #include "XPathParser.h" 30 31 #if ENABLE(XPATH) 32 33 #include "ExceptionCode.h" 34 #include "XPathEvaluator.h" 35 #include "XPathException.h" 36 #include "XPathNSResolver.h" 37 #include "XPathStep.h" 38 #include <wtf/StdLibExtras.h> 39 #include <wtf/text/StringHash.h> 40 41 int xpathyyparse(void*); 42 43 using namespace WTF; 44 using namespace Unicode; 45 46 namespace WebCore { 47 namespace XPath { 48 49 class LocationPath; 50 51 #include "XPathGrammar.h" 52 53 Parser* Parser::currentParser = 0; 54 55 enum XMLCat { NameStart, NameCont, NotPartOfName }; 56 57 typedef HashMap<String, Step::Axis> AxisNamesMap; 58 59 static XMLCat charCat(UChar aChar) 60 { 61 //### might need to add some special cases from the XML spec. 62 63 if (aChar == '_') 64 return NameStart; 65 66 if (aChar == '.' || aChar == '-') 67 return NameCont; 68 CharCategory category = Unicode::category(aChar); 69 if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter)) 70 return NameStart; 71 if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit)) 72 return NameCont; 73 return NotPartOfName; 74 } 75 76 static void setUpAxisNamesMap(AxisNamesMap& axisNames) 77 { 78 struct AxisName { 79 const char* name; 80 Step::Axis axis; 81 }; 82 const AxisName axisNameList[] = { 83 { "ancestor", Step::AncestorAxis }, 84 { "ancestor-or-self", Step::AncestorOrSelfAxis }, 85 { "attribute", Step::AttributeAxis }, 86 { "child", Step::ChildAxis }, 87 { "descendant", Step::DescendantAxis }, 88 { "descendant-or-self", Step::DescendantOrSelfAxis }, 89 { "following", Step::FollowingAxis }, 90 { "following-sibling", Step::FollowingSiblingAxis }, 91 { "namespace", Step::NamespaceAxis }, 92 { "parent", Step::ParentAxis }, 93 { "preceding", Step::PrecedingAxis }, 94 { "preceding-sibling", Step::PrecedingSiblingAxis }, 95 { "self", Step::SelfAxis } 96 }; 97 for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i) 98 axisNames.set(axisNameList[i].name, axisNameList[i].axis); 99 } 100 101 static bool isAxisName(const String& name, Step::Axis& type) 102 { 103 DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ()); 104 105 if (axisNames.isEmpty()) 106 setUpAxisNamesMap(axisNames); 107 108 AxisNamesMap::iterator it = axisNames.find(name); 109 if (it == axisNames.end()) 110 return false; 111 type = it->second; 112 return true; 113 } 114 115 static bool isNodeTypeName(const String& name) 116 { 117 DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ()); 118 if (nodeTypeNames.isEmpty()) { 119 nodeTypeNames.add("comment"); 120 nodeTypeNames.add("text"); 121 nodeTypeNames.add("processing-instruction"); 122 nodeTypeNames.add("node"); 123 } 124 return nodeTypeNames.contains(name); 125 } 126 127 // Returns whether the current token can possibly be a binary operator, given 128 // the previous token. Necessary to disambiguate some of the operators 129 // (* (multiply), div, and, or, mod) in the [32] Operator rule 130 // (check http://www.w3.org/TR/xpath#exprlex). 131 bool Parser::isBinaryOperatorContext() const 132 { 133 switch (m_lastTokenType) { 134 case 0: 135 case '@': case AXISNAME: case '(': case '[': case ',': 136 case AND: case OR: case MULOP: 137 case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: 138 case EQOP: case RELOP: 139 return false; 140 default: 141 return true; 142 } 143 } 144 145 void Parser::skipWS() 146 { 147 while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) 148 ++m_nextPos; 149 } 150 151 Token Parser::makeTokenAndAdvance(int code, int advance) 152 { 153 m_nextPos += advance; 154 return Token(code); 155 } 156 157 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) 158 { 159 m_nextPos += advance; 160 return Token(code, val); 161 } 162 163 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) 164 { 165 m_nextPos += advance; 166 return Token(code, val); 167 } 168 169 // Returns next char if it's there and interesting, 0 otherwise 170 char Parser::peekAheadHelper() 171 { 172 if (m_nextPos + 1 >= m_data.length()) 173 return 0; 174 UChar next = m_data[m_nextPos + 1]; 175 if (next >= 0xff) 176 return 0; 177 return next; 178 } 179 180 char Parser::peekCurHelper() 181 { 182 if (m_nextPos >= m_data.length()) 183 return 0; 184 UChar next = m_data[m_nextPos]; 185 if (next >= 0xff) 186 return 0; 187 return next; 188 } 189 190 Token Parser::lexString() 191 { 192 UChar delimiter = m_data[m_nextPos]; 193 int startPos = m_nextPos + 1; 194 195 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { 196 if (m_data[m_nextPos] == delimiter) { 197 String value = m_data.substring(startPos, m_nextPos - startPos); 198 if (value.isNull()) 199 value = ""; 200 ++m_nextPos; // Consume the char. 201 return Token(LITERAL, value); 202 } 203 } 204 205 // Ouch, went off the end -- report error. 206 return Token(XPATH_ERROR); 207 } 208 209 Token Parser::lexNumber() 210 { 211 int startPos = m_nextPos; 212 bool seenDot = false; 213 214 // Go until end or a non-digits character. 215 for (; m_nextPos < m_data.length(); ++m_nextPos) { 216 UChar aChar = m_data[m_nextPos]; 217 if (aChar >= 0xff) break; 218 219 if (aChar < '0' || aChar > '9') { 220 if (aChar == '.' && !seenDot) 221 seenDot = true; 222 else 223 break; 224 } 225 } 226 227 return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); 228 } 229 230 bool Parser::lexNCName(String& name) 231 { 232 int startPos = m_nextPos; 233 if (m_nextPos >= m_data.length()) 234 return false; 235 236 if (charCat(m_data[m_nextPos]) != NameStart) 237 return false; 238 239 // Keep going until we get a character that's not good for names. 240 for (; m_nextPos < m_data.length(); ++m_nextPos) 241 if (charCat(m_data[m_nextPos]) == NotPartOfName) 242 break; 243 244 name = m_data.substring(startPos, m_nextPos - startPos); 245 return true; 246 } 247 248 bool Parser::lexQName(String& name) 249 { 250 String n1; 251 if (!lexNCName(n1)) 252 return false; 253 254 skipWS(); 255 256 // If the next character is :, what we just got it the prefix, if not, 257 // it's the whole thing. 258 if (peekAheadHelper() != ':') { 259 name = n1; 260 return true; 261 } 262 263 String n2; 264 if (!lexNCName(n2)) 265 return false; 266 267 name = n1 + ":" + n2; 268 return true; 269 } 270 271 Token Parser::nextTokenInternal() 272 { 273 skipWS(); 274 275 if (m_nextPos >= m_data.length()) 276 return Token(0); 277 278 char code = peekCurHelper(); 279 switch (code) { 280 case '(': case ')': case '[': case ']': 281 case '@': case ',': case '|': 282 return makeTokenAndAdvance(code); 283 case '\'': 284 case '\"': 285 return lexString(); 286 case '0': case '1': case '2': case '3': case '4': 287 case '5': case '6': case '7': case '8': case '9': 288 return lexNumber(); 289 case '.': { 290 char next = peekAheadHelper(); 291 if (next == '.') 292 return makeTokenAndAdvance(DOTDOT, 2); 293 if (next >= '0' && next <= '9') 294 return lexNumber(); 295 return makeTokenAndAdvance('.'); 296 } 297 case '/': 298 if (peekAheadHelper() == '/') 299 return makeTokenAndAdvance(SLASHSLASH, 2); 300 return makeTokenAndAdvance('/'); 301 case '+': 302 return makeTokenAndAdvance(PLUS); 303 case '-': 304 return makeTokenAndAdvance(MINUS); 305 case '=': 306 return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); 307 case '!': 308 if (peekAheadHelper() == '=') 309 return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); 310 return Token(XPATH_ERROR); 311 case '<': 312 if (peekAheadHelper() == '=') 313 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); 314 return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); 315 case '>': 316 if (peekAheadHelper() == '=') 317 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); 318 return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); 319 case '*': 320 if (isBinaryOperatorContext()) 321 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); 322 ++m_nextPos; 323 return Token(NAMETEST, "*"); 324 case '$': { // $ QName 325 m_nextPos++; 326 String name; 327 if (!lexQName(name)) 328 return Token(XPATH_ERROR); 329 return Token(VARIABLEREFERENCE, name); 330 } 331 } 332 333 String name; 334 if (!lexNCName(name)) 335 return Token(XPATH_ERROR); 336 337 skipWS(); 338 // If we're in an operator context, check for any operator names 339 if (isBinaryOperatorContext()) { 340 if (name == "and") //### hash? 341 return Token(AND); 342 if (name == "or") 343 return Token(OR); 344 if (name == "mod") 345 return Token(MULOP, NumericOp::OP_Mod); 346 if (name == "div") 347 return Token(MULOP, NumericOp::OP_Div); 348 } 349 350 // See whether we are at a : 351 if (peekCurHelper() == ':') { 352 m_nextPos++; 353 // Any chance it's an axis name? 354 if (peekCurHelper() == ':') { 355 m_nextPos++; 356 357 //It might be an axis name. 358 Step::Axis axis; 359 if (isAxisName(name, axis)) 360 return Token(AXISNAME, axis); 361 // Ugh, :: is only valid in axis names -> error 362 return Token(XPATH_ERROR); 363 } 364 365 // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest 366 skipWS(); 367 if (peekCurHelper() == '*') { 368 m_nextPos++; 369 return Token(NAMETEST, name + ":*"); 370 } 371 372 // Make a full qname. 373 String n2; 374 if (!lexNCName(n2)) 375 return Token(XPATH_ERROR); 376 377 name = name + ":" + n2; 378 } 379 380 skipWS(); 381 if (peekCurHelper() == '(') { 382 //note: we don't swallow the (here! 383 384 //either node type of function name 385 if (isNodeTypeName(name)) { 386 if (name == "processing-instruction") 387 return Token(PI, name); 388 389 return Token(NODETYPE, name); 390 } 391 //must be a function name. 392 return Token(FUNCTIONNAME, name); 393 } 394 395 // At this point, it must be NAMETEST. 396 return Token(NAMETEST, name); 397 } 398 399 Token Parser::nextToken() 400 { 401 Token toRet = nextTokenInternal(); 402 m_lastTokenType = toRet.type; 403 return toRet; 404 } 405 406 Parser::Parser() 407 { 408 reset(String()); 409 } 410 411 Parser::~Parser() 412 { 413 } 414 415 void Parser::reset(const String& data) 416 { 417 m_nextPos = 0; 418 m_data = data; 419 m_lastTokenType = 0; 420 421 m_topExpr = 0; 422 m_gotNamespaceError = false; 423 } 424 425 int Parser::lex(void* data) 426 { 427 YYSTYPE* yylval = static_cast<YYSTYPE*>(data); 428 Token tok = nextToken(); 429 430 switch (tok.type) { 431 case AXISNAME: 432 yylval->axis = tok.axis; 433 break; 434 case MULOP: 435 yylval->numop = tok.numop; 436 break; 437 case RELOP: 438 case EQOP: 439 yylval->eqop = tok.eqop; 440 break; 441 case NODETYPE: 442 case PI: 443 case FUNCTIONNAME: 444 case LITERAL: 445 case VARIABLEREFERENCE: 446 case NUMBER: 447 case NAMETEST: 448 yylval->str = new String(tok.str); 449 registerString(yylval->str); 450 break; 451 } 452 453 return tok.type; 454 } 455 456 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI) 457 { 458 size_t colon = qName.find(':'); 459 if (colon != notFound) { 460 if (!m_resolver) 461 return false; 462 namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon)); 463 if (namespaceURI.isNull()) 464 return false; 465 localName = qName.substring(colon + 1); 466 } else 467 localName = qName; 468 469 return true; 470 } 471 472 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec) 473 { 474 reset(statement); 475 476 m_resolver = resolver; 477 478 Parser* oldParser = currentParser; 479 currentParser = this; 480 int parseError = xpathyyparse(this); 481 currentParser = oldParser; 482 483 if (parseError) { 484 deleteAllValues(m_parseNodes); 485 m_parseNodes.clear(); 486 487 HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end(); 488 for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) { 489 deleteAllValues(**it); 490 delete *it; 491 } 492 m_predicateVectors.clear(); 493 494 HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end(); 495 for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) { 496 deleteAllValues(**it); 497 delete *it; 498 } 499 m_expressionVectors.clear(); 500 501 deleteAllValues(m_strings); 502 m_strings.clear(); 503 504 deleteAllValues(m_nodeTests); 505 m_nodeTests.clear(); 506 507 m_topExpr = 0; 508 509 if (m_gotNamespaceError) 510 ec = NAMESPACE_ERR; 511 else 512 ec = XPathException::INVALID_EXPRESSION_ERR; 513 return 0; 514 } 515 516 ASSERT(m_parseNodes.size() == 1); 517 ASSERT(*m_parseNodes.begin() == m_topExpr); 518 ASSERT(m_expressionVectors.size() == 0); 519 ASSERT(m_predicateVectors.size() == 0); 520 ASSERT(m_strings.size() == 0); 521 ASSERT(m_nodeTests.size() == 0); 522 523 m_parseNodes.clear(); 524 Expression* result = m_topExpr; 525 m_topExpr = 0; 526 527 return result; 528 } 529 530 void Parser::registerParseNode(ParseNode* node) 531 { 532 if (node == 0) 533 return; 534 535 ASSERT(!m_parseNodes.contains(node)); 536 537 m_parseNodes.add(node); 538 } 539 540 void Parser::unregisterParseNode(ParseNode* node) 541 { 542 if (node == 0) 543 return; 544 545 ASSERT(m_parseNodes.contains(node)); 546 547 m_parseNodes.remove(node); 548 } 549 550 void Parser::registerPredicateVector(Vector<Predicate*>* vector) 551 { 552 if (vector == 0) 553 return; 554 555 ASSERT(!m_predicateVectors.contains(vector)); 556 557 m_predicateVectors.add(vector); 558 } 559 560 void Parser::deletePredicateVector(Vector<Predicate*>* vector) 561 { 562 if (vector == 0) 563 return; 564 565 ASSERT(m_predicateVectors.contains(vector)); 566 567 m_predicateVectors.remove(vector); 568 delete vector; 569 } 570 571 572 void Parser::registerExpressionVector(Vector<Expression*>* vector) 573 { 574 if (vector == 0) 575 return; 576 577 ASSERT(!m_expressionVectors.contains(vector)); 578 579 m_expressionVectors.add(vector); 580 } 581 582 void Parser::deleteExpressionVector(Vector<Expression*>* vector) 583 { 584 if (vector == 0) 585 return; 586 587 ASSERT(m_expressionVectors.contains(vector)); 588 589 m_expressionVectors.remove(vector); 590 delete vector; 591 } 592 593 void Parser::registerString(String* s) 594 { 595 if (s == 0) 596 return; 597 598 ASSERT(!m_strings.contains(s)); 599 600 m_strings.add(s); 601 } 602 603 void Parser::deleteString(String* s) 604 { 605 if (s == 0) 606 return; 607 608 ASSERT(m_strings.contains(s)); 609 610 m_strings.remove(s); 611 delete s; 612 } 613 614 void Parser::registerNodeTest(Step::NodeTest* t) 615 { 616 if (t == 0) 617 return; 618 619 ASSERT(!m_nodeTests.contains(t)); 620 621 m_nodeTests.add(t); 622 } 623 624 void Parser::deleteNodeTest(Step::NodeTest* t) 625 { 626 if (t == 0) 627 return; 628 629 ASSERT(m_nodeTests.contains(t)); 630 631 m_nodeTests.remove(t); 632 delete t; 633 } 634 635 } 636 } 637 638 #endif // ENABLE(XPATH) 639