1 /* 2 * Copyright 2005 Maksim Orlovich <maksim (at) kde.org> 3 * Copyright (C) 2006 Apple Computer, Inc. 4 * Copyright (C) 2007 Alexey Proskuryakov <ap (at) webkit.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "config.h" 29 #include "core/xml/XPathParser.h" 30 31 #include "bindings/core/v8/ExceptionState.h" 32 #include "core/XPathGrammar.h" 33 #include "core/dom/ExceptionCode.h" 34 #include "core/xml/XPathEvaluator.h" 35 #include "core/xml/XPathNSResolver.h" 36 #include "core/xml/XPathPath.h" 37 #include "wtf/StdLibExtras.h" 38 #include "wtf/text/StringHash.h" 39 40 using namespace blink; 41 using namespace WTF; 42 using namespace Unicode; 43 using namespace XPath; 44 45 Parser* Parser::currentParser = 0; 46 47 enum XMLCat { NameStart, NameCont, NotPartOfName }; 48 49 typedef HashMap<String, Step::Axis> AxisNamesMap; 50 51 static XMLCat charCat(UChar aChar) 52 { 53 // might need to add some special cases from the XML spec. 54 55 if (aChar == '_') 56 return NameStart; 57 58 if (aChar == '.' || aChar == '-') 59 return NameCont; 60 CharCategory category = Unicode::category(aChar); 61 if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter)) 62 return NameStart; 63 if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit)) 64 return NameCont; 65 return NotPartOfName; 66 } 67 68 static void setUpAxisNamesMap(AxisNamesMap& axisNames) 69 { 70 struct AxisName { 71 const char* name; 72 Step::Axis axis; 73 }; 74 const AxisName axisNameList[] = { 75 { "ancestor", Step::AncestorAxis }, 76 { "ancestor-or-self", Step::AncestorOrSelfAxis }, 77 { "attribute", Step::AttributeAxis }, 78 { "child", Step::ChildAxis }, 79 { "descendant", Step::DescendantAxis }, 80 { "descendant-or-self", Step::DescendantOrSelfAxis }, 81 { "following", Step::FollowingAxis }, 82 { "following-sibling", Step::FollowingSiblingAxis }, 83 { "namespace", Step::NamespaceAxis }, 84 { "parent", Step::ParentAxis }, 85 { "preceding", Step::PrecedingAxis }, 86 { "preceding-sibling", Step::PrecedingSiblingAxis }, 87 { "self", Step::SelfAxis } 88 }; 89 for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i) 90 axisNames.set(axisNameList[i].name, axisNameList[i].axis); 91 } 92 93 static bool isAxisName(const String& name, Step::Axis& type) 94 { 95 DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ()); 96 97 if (axisNames.isEmpty()) 98 setUpAxisNamesMap(axisNames); 99 100 AxisNamesMap::iterator it = axisNames.find(name); 101 if (it == axisNames.end()) 102 return false; 103 type = it->value; 104 return true; 105 } 106 107 static bool isNodeTypeName(const String& name) 108 { 109 DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ()); 110 if (nodeTypeNames.isEmpty()) { 111 nodeTypeNames.add("comment"); 112 nodeTypeNames.add("text"); 113 nodeTypeNames.add("processing-instruction"); 114 nodeTypeNames.add("node"); 115 } 116 return nodeTypeNames.contains(name); 117 } 118 119 // Returns whether the current token can possibly be a binary operator, given 120 // the previous token. Necessary to disambiguate some of the operators 121 // (* (multiply), div, and, or, mod) in the [32] Operator rule 122 // (check http://www.w3.org/TR/xpath#exprlex). 123 bool Parser::isBinaryOperatorContext() const 124 { 125 switch (m_lastTokenType) { 126 case 0: 127 case '@': case AXISNAME: case '(': case '[': case ',': 128 case AND: case OR: case MULOP: 129 case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: 130 case EQOP: case RELOP: 131 return false; 132 default: 133 return true; 134 } 135 } 136 137 void Parser::skipWS() 138 { 139 while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) 140 ++m_nextPos; 141 } 142 143 Token Parser::makeTokenAndAdvance(int code, int advance) 144 { 145 m_nextPos += advance; 146 return Token(code); 147 } 148 149 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) 150 { 151 m_nextPos += advance; 152 return Token(code, val); 153 } 154 155 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) 156 { 157 m_nextPos += advance; 158 return Token(code, val); 159 } 160 161 // Returns next char if it's there and interesting, 0 otherwise 162 char Parser::peekAheadHelper() 163 { 164 if (m_nextPos + 1 >= m_data.length()) 165 return 0; 166 UChar next = m_data[m_nextPos + 1]; 167 if (next >= 0xff) 168 return 0; 169 return next; 170 } 171 172 char Parser::peekCurHelper() 173 { 174 if (m_nextPos >= m_data.length()) 175 return 0; 176 UChar next = m_data[m_nextPos]; 177 if (next >= 0xff) 178 return 0; 179 return next; 180 } 181 182 Token Parser::lexString() 183 { 184 UChar delimiter = m_data[m_nextPos]; 185 int startPos = m_nextPos + 1; 186 187 for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { 188 if (m_data[m_nextPos] == delimiter) { 189 String value = m_data.substring(startPos, m_nextPos - startPos); 190 if (value.isNull()) 191 value = ""; 192 ++m_nextPos; // Consume the char. 193 return Token(LITERAL, value); 194 } 195 } 196 197 // Ouch, went off the end -- report error. 198 return Token(XPATH_ERROR); 199 } 200 201 Token Parser::lexNumber() 202 { 203 int startPos = m_nextPos; 204 bool seenDot = false; 205 206 // Go until end or a non-digits character. 207 for (; m_nextPos < m_data.length(); ++m_nextPos) { 208 UChar aChar = m_data[m_nextPos]; 209 if (aChar >= 0xff) 210 break; 211 212 if (aChar < '0' || aChar > '9') { 213 if (aChar == '.' && !seenDot) 214 seenDot = true; 215 else 216 break; 217 } 218 } 219 220 return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); 221 } 222 223 bool Parser::lexNCName(String& name) 224 { 225 int startPos = m_nextPos; 226 if (m_nextPos >= m_data.length()) 227 return false; 228 229 if (charCat(m_data[m_nextPos]) != NameStart) 230 return false; 231 232 // Keep going until we get a character that's not good for names. 233 for (; m_nextPos < m_data.length(); ++m_nextPos) { 234 if (charCat(m_data[m_nextPos]) == NotPartOfName) 235 break; 236 } 237 238 name = m_data.substring(startPos, m_nextPos - startPos); 239 return true; 240 } 241 242 bool Parser::lexQName(String& name) 243 { 244 String n1; 245 if (!lexNCName(n1)) 246 return false; 247 248 skipWS(); 249 250 // If the next character is :, what we just got it the prefix, if not, 251 // it's the whole thing. 252 if (peekAheadHelper() != ':') { 253 name = n1; 254 return true; 255 } 256 257 String n2; 258 if (!lexNCName(n2)) 259 return false; 260 261 name = n1 + ":" + n2; 262 return true; 263 } 264 265 Token Parser::nextTokenInternal() 266 { 267 skipWS(); 268 269 if (m_nextPos >= m_data.length()) 270 return Token(0); 271 272 char code = peekCurHelper(); 273 switch (code) { 274 case '(': case ')': case '[': case ']': 275 case '@': case ',': case '|': 276 return makeTokenAndAdvance(code); 277 case '\'': 278 case '\"': 279 return lexString(); 280 case '0': case '1': case '2': case '3': case '4': 281 case '5': case '6': case '7': case '8': case '9': 282 return lexNumber(); 283 case '.': { 284 char next = peekAheadHelper(); 285 if (next == '.') 286 return makeTokenAndAdvance(DOTDOT, 2); 287 if (next >= '0' && next <= '9') 288 return lexNumber(); 289 return makeTokenAndAdvance('.'); 290 } 291 case '/': 292 if (peekAheadHelper() == '/') 293 return makeTokenAndAdvance(SLASHSLASH, 2); 294 return makeTokenAndAdvance('/'); 295 case '+': 296 return makeTokenAndAdvance(PLUS); 297 case '-': 298 return makeTokenAndAdvance(MINUS); 299 case '=': 300 return makeTokenAndAdvance(EQOP, EqTestOp::OpcodeEqual); 301 case '!': 302 if (peekAheadHelper() == '=') 303 return makeTokenAndAdvance(EQOP, EqTestOp::OpcodeNotEqual, 2); 304 return Token(XPATH_ERROR); 305 case '<': 306 if (peekAheadHelper() == '=') 307 return makeTokenAndAdvance(RELOP, EqTestOp::OpcodeLessOrEqual, 2); 308 return makeTokenAndAdvance(RELOP, EqTestOp::OpcodeLessThan); 309 case '>': 310 if (peekAheadHelper() == '=') 311 return makeTokenAndAdvance(RELOP, EqTestOp::OpcodeGreaterOrEqual, 2); 312 return makeTokenAndAdvance(RELOP, EqTestOp::OpcodeGreaterThan); 313 case '*': 314 if (isBinaryOperatorContext()) 315 return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); 316 ++m_nextPos; 317 return Token(NAMETEST, "*"); 318 case '$': { // $ QName 319 m_nextPos++; 320 String name; 321 if (!lexQName(name)) 322 return Token(XPATH_ERROR); 323 return Token(VARIABLEREFERENCE, name); 324 } 325 } 326 327 String name; 328 if (!lexNCName(name)) 329 return Token(XPATH_ERROR); 330 331 skipWS(); 332 // If we're in an operator context, check for any operator names 333 if (isBinaryOperatorContext()) { 334 if (name == "and") // ### hash? 335 return Token(AND); 336 if (name == "or") 337 return Token(OR); 338 if (name == "mod") 339 return Token(MULOP, NumericOp::OP_Mod); 340 if (name == "div") 341 return Token(MULOP, NumericOp::OP_Div); 342 } 343 344 // See whether we are at a : 345 if (peekCurHelper() == ':') { 346 m_nextPos++; 347 // Any chance it's an axis name? 348 if (peekCurHelper() == ':') { 349 m_nextPos++; 350 351 // It might be an axis name. 352 Step::Axis axis; 353 if (isAxisName(name, axis)) 354 return Token(AXISNAME, axis); 355 // Ugh, :: is only valid in axis names -> error 356 return Token(XPATH_ERROR); 357 } 358 359 // Seems like this is a fully qualified qname, or perhaps the * modified 360 // one from NameTest 361 skipWS(); 362 if (peekCurHelper() == '*') { 363 m_nextPos++; 364 return Token(NAMETEST, name + ":*"); 365 } 366 367 // Make a full qname. 368 String n2; 369 if (!lexNCName(n2)) 370 return Token(XPATH_ERROR); 371 372 name = name + ":" + n2; 373 } 374 375 skipWS(); 376 if (peekCurHelper() == '(') { 377 // Note: we don't swallow the ( here! 378 379 // Either node type of function name 380 if (isNodeTypeName(name)) { 381 if (name == "processing-instruction") 382 return Token(PI, name); 383 384 return Token(NODETYPE, name); 385 } 386 // Must be a function name. 387 return Token(FUNCTIONNAME, name); 388 } 389 390 // At this point, it must be NAMETEST. 391 return Token(NAMETEST, name); 392 } 393 394 Token Parser::nextToken() 395 { 396 Token toRet = nextTokenInternal(); 397 m_lastTokenType = toRet.type; 398 return toRet; 399 } 400 401 Parser::Parser() 402 { 403 reset(String()); 404 } 405 406 Parser::~Parser() 407 { 408 } 409 410 void Parser::reset(const String& data) 411 { 412 m_nextPos = 0; 413 m_data = data; 414 m_lastTokenType = 0; 415 416 m_topExpr = nullptr; 417 m_gotNamespaceError = false; 418 } 419 420 int Parser::lex(void* data) 421 { 422 YYSTYPE* yylval = static_cast<YYSTYPE*>(data); 423 Token tok = nextToken(); 424 425 switch (tok.type) { 426 case AXISNAME: 427 yylval->axis = tok.axis; 428 break; 429 case MULOP: 430 yylval->numop = tok.numop; 431 break; 432 case RELOP: 433 case EQOP: 434 yylval->eqop = tok.eqop; 435 break; 436 case NODETYPE: 437 case PI: 438 case FUNCTIONNAME: 439 case LITERAL: 440 case VARIABLEREFERENCE: 441 case NUMBER: 442 case NAMETEST: 443 yylval->str = new String(tok.str); 444 registerString(yylval->str); 445 break; 446 } 447 448 return tok.type; 449 } 450 451 bool Parser::expandQName(const String& qName, AtomicString& localName, AtomicString& namespaceURI) 452 { 453 size_t colon = qName.find(':'); 454 if (colon != kNotFound) { 455 if (!m_resolver) 456 return false; 457 namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon)); 458 if (namespaceURI.isNull()) 459 return false; 460 localName = AtomicString(qName.substring(colon + 1)); 461 } else { 462 localName = AtomicString(qName); 463 } 464 465 return true; 466 } 467 468 PassOwnPtrWillBeRawPtr<Expression> Parser::parseStatement(const String& statement, PassRefPtrWillBeRawPtr<XPathNSResolver> resolver, ExceptionState& exceptionState) 469 { 470 reset(statement); 471 472 m_resolver = resolver; 473 474 Parser* oldParser = currentParser; 475 currentParser = this; 476 int parseError = xpathyyparse(this); 477 currentParser = oldParser; 478 479 if (parseError) { 480 #if !ENABLE(OILPAN) 481 while (!m_parseNodes.isEmpty()) 482 delete m_parseNodes.takeAny(); 483 484 HashSet<Vector<OwnPtr<Predicate> >*>::iterator pend = m_predicateVectors.end(); 485 for (HashSet<Vector<OwnPtr<Predicate> >*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) 486 delete *it; 487 m_predicateVectors.clear(); 488 489 HashSet<Vector<OwnPtr<Expression> >*>::iterator eend = m_expressionVectors.end(); 490 for (HashSet<Vector<OwnPtr<Expression> >*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) 491 delete *it; 492 m_expressionVectors.clear(); 493 494 m_nodeTests.clear(); 495 #endif 496 497 m_strings.clear(); 498 499 m_topExpr = nullptr; 500 501 if (m_gotNamespaceError) 502 exceptionState.throwDOMException(NamespaceError, "The string '" + statement + "' contains unresolvable namespaces."); 503 else 504 exceptionState.throwDOMException(SyntaxError, "The string '" + statement + "' is not a valid XPath expression."); 505 return nullptr; 506 } 507 ASSERT(m_strings.size() == 0); 508 #if !ENABLE(OILPAN) 509 ASSERT(m_parseNodes.size() == 1); 510 ASSERT(*m_parseNodes.begin() == m_topExpr); 511 ASSERT(m_expressionVectors.size() == 0); 512 ASSERT(m_predicateVectors.size() == 0); 513 ASSERT(m_nodeTests.size() == 0); 514 m_parseNodes.clear(); 515 #endif 516 517 Expression* result = m_topExpr; 518 m_topExpr = nullptr; 519 520 return adoptPtrWillBeNoop(result); 521 } 522 523 void Parser::registerParseNode(ParseNode* node) 524 { 525 #if !ENABLE(OILPAN) 526 if (node == 0) 527 return; 528 529 ASSERT(!m_parseNodes.contains(node)); 530 531 m_parseNodes.add(node); 532 #endif 533 } 534 535 void Parser::unregisterParseNode(ParseNode* node) 536 { 537 #if !ENABLE(OILPAN) 538 if (node == 0) 539 return; 540 541 ASSERT(m_parseNodes.contains(node)); 542 543 m_parseNodes.remove(node); 544 #endif 545 } 546 547 void Parser::registerPredicateVector(WillBeHeapVector<OwnPtrWillBeMember<Predicate> >* vector) 548 { 549 #if !ENABLE(OILPAN) 550 if (vector == 0) 551 return; 552 553 ASSERT(!m_predicateVectors.contains(vector)); 554 555 m_predicateVectors.add(vector); 556 #endif 557 } 558 559 void Parser::deletePredicateVector(WillBeHeapVector<OwnPtrWillBeMember<Predicate> >* vector) 560 { 561 #if !ENABLE(OILPAN) 562 if (vector == 0) 563 return; 564 565 ASSERT(m_predicateVectors.contains(vector)); 566 567 m_predicateVectors.remove(vector); 568 delete vector; 569 #endif 570 } 571 572 573 void Parser::registerExpressionVector(WillBeHeapVector<OwnPtrWillBeMember<Expression> >* vector) 574 { 575 #if !ENABLE(OILPAN) 576 if (vector == 0) 577 return; 578 579 ASSERT(!m_expressionVectors.contains(vector)); 580 581 m_expressionVectors.add(vector); 582 #endif 583 } 584 585 void Parser::deleteExpressionVector(WillBeHeapVector<OwnPtrWillBeMember<Expression> >* vector) 586 { 587 #if !ENABLE(OILPAN) 588 if (vector == 0) 589 return; 590 591 ASSERT(m_expressionVectors.contains(vector)); 592 593 m_expressionVectors.remove(vector); 594 delete vector; 595 #endif 596 } 597 598 void Parser::registerString(String* s) 599 { 600 if (s == 0) 601 return; 602 603 ASSERT(!m_strings.contains(s)); 604 605 m_strings.add(adoptPtr(s)); 606 } 607 608 void Parser::deleteString(String* s) 609 { 610 if (s == 0) 611 return; 612 613 ASSERT(m_strings.contains(s)); 614 615 m_strings.remove(s); 616 } 617 618 void Parser::registerNodeTest(Step::NodeTest* t) 619 { 620 #if !ENABLE(OILPAN) 621 if (t == 0) 622 return; 623 624 ASSERT(!m_nodeTests.contains(t)); 625 626 m_nodeTests.add(adoptPtr(t)); 627 #endif 628 } 629 630 void Parser::deleteNodeTest(Step::NodeTest* t) 631 { 632 #if !ENABLE(OILPAN) 633 if (t == 0) 634 return; 635 636 ASSERT(m_nodeTests.contains(t)); 637 638 m_nodeTests.remove(t); 639 #endif 640 } 641 642