1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "tools/gn/parser.h" 6 7 #include "base/logging.h" 8 #include "tools/gn/functions.h" 9 #include "tools/gn/operators.h" 10 #include "tools/gn/token.h" 11 12 // grammar: 13 // 14 // file := (statement)* 15 // statement := block | if | assignment 16 // block := '{' statement* '}' 17 // if := 'if' '(' expr ')' statement [ else ] 18 // else := 'else' (if | statement)* 19 // assignment := ident {'=' | '+=' | '-='} expr 20 21 enum Precedence { 22 PRECEDENCE_ASSIGNMENT = 1, 23 PRECEDENCE_OR = 2, 24 PRECEDENCE_AND = 3, 25 PRECEDENCE_EQUALITY = 4, 26 PRECEDENCE_RELATION = 5, 27 PRECEDENCE_SUM = 6, 28 PRECEDENCE_PREFIX = 7, 29 PRECEDENCE_CALL = 8, 30 }; 31 32 // The top-level for blocks/ifs is still recursive descent, the expression 33 // parser is a Pratt parser. The basic idea there is to have the precedences 34 // (and associativities) encoded relative to each other and only parse up 35 // until you hit something of that precedence. There's a dispatch table in 36 // expressions_ at the top of parser.cc that describes how each token 37 // dispatches if it's seen as either a prefix or infix operator, and if it's 38 // infix, what its precedence is. 39 // 40 // Refs: 41 // - http://javascript.crockford.com/tdop/tdop.html 42 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ 43 44 // Indexed by Token::Type. 45 ParserHelper Parser::expressions_[] = { 46 {NULL, NULL, -1}, // INVALID 47 {&Parser::Literal, NULL, -1}, // INTEGER 48 {&Parser::Literal, NULL, -1}, // STRING 49 {&Parser::Literal, NULL, -1}, // TRUE_TOKEN 50 {&Parser::Literal, NULL, -1}, // FALSE_TOKEN 51 {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL 52 {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS 53 {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS 54 {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS 55 {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS 56 {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL 57 {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL 58 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL 59 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL 60 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN 61 {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN 62 {NULL, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND 63 {NULL, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR 64 {&Parser::Not, NULL, -1}, // BANG 65 {&Parser::Group, NULL, -1}, // LEFT_PAREN 66 {NULL, NULL, -1}, // RIGHT_PAREN 67 {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET 68 {NULL, NULL, -1}, // RIGHT_BRACKET 69 {NULL, NULL, -1}, // LEFT_BRACE 70 {NULL, NULL, -1}, // RIGHT_BRACE 71 {NULL, NULL, -1}, // IF 72 {NULL, NULL, -1}, // ELSE 73 {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER 74 {NULL, NULL, -1}, // COMMA 75 {NULL, NULL, -1}, // COMMENT 76 }; 77 78 Parser::Parser(const std::vector<Token>& tokens, Err* err) 79 : tokens_(tokens), err_(err), cur_(0) { 80 } 81 82 Parser::~Parser() { 83 } 84 85 // static 86 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens, 87 Err* err) { 88 Parser p(tokens, err); 89 return p.ParseFile().PassAs<ParseNode>(); 90 } 91 92 // static 93 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens, 94 Err* err) { 95 Parser p(tokens, err); 96 return p.ParseExpression().Pass(); 97 } 98 99 bool Parser::IsAssignment(const ParseNode* node) const { 100 return node && node->AsBinaryOp() && 101 (node->AsBinaryOp()->op().type() == Token::EQUAL || 102 node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS || 103 node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS); 104 } 105 106 bool Parser::IsStatementBreak(Token::Type token_type) const { 107 switch (token_type) { 108 case Token::IDENTIFIER: 109 case Token::LEFT_BRACE: 110 case Token::RIGHT_BRACE: 111 case Token::IF: 112 case Token::ELSE: 113 return true; 114 default: 115 return false; 116 } 117 } 118 119 bool Parser::LookAhead(Token::Type type) { 120 if (at_end()) 121 return false; 122 return cur_token().type() == type; 123 } 124 125 bool Parser::Match(Token::Type type) { 126 if (!LookAhead(type)) 127 return false; 128 Consume(); 129 return true; 130 } 131 132 Token Parser::Consume(Token::Type type, const char* error_message) { 133 Token::Type types[1] = { type }; 134 return Consume(types, 1, error_message); 135 } 136 137 Token Parser::Consume(Token::Type* types, 138 size_t num_types, 139 const char* error_message) { 140 if (has_error()) { 141 // Don't overwrite current error, but make progress through tokens so that 142 // a loop that's expecting a particular token will still terminate. 143 cur_++; 144 return Token(Location(), Token::INVALID, base::StringPiece()); 145 } 146 if (at_end()) { 147 const char kEOFMsg[] = "I hit EOF instead."; 148 if (tokens_.empty()) 149 *err_ = Err(Location(), error_message, kEOFMsg); 150 else 151 *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg); 152 return Token(Location(), Token::INVALID, base::StringPiece()); 153 } 154 155 for (size_t i = 0; i < num_types; ++i) { 156 if (cur_token().type() == types[i]) 157 return tokens_[cur_++]; 158 } 159 *err_ = Err(cur_token(), error_message); 160 return Token(Location(), Token::INVALID, base::StringPiece()); 161 } 162 163 Token Parser::Consume() { 164 return tokens_[cur_++]; 165 } 166 167 scoped_ptr<ParseNode> Parser::ParseExpression() { 168 return ParseExpression(0); 169 } 170 171 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) { 172 if (at_end()) 173 return scoped_ptr<ParseNode>(); 174 175 Token token = Consume(); 176 PrefixFunc prefix = expressions_[token.type()].prefix; 177 178 if (prefix == NULL) { 179 *err_ = Err(token, 180 std::string("Unexpected token '") + token.value().as_string() + 181 std::string("'")); 182 return scoped_ptr<ParseNode>(); 183 } 184 185 scoped_ptr<ParseNode> left = (this->*prefix)(token); 186 if (has_error()) 187 return left.Pass(); 188 189 while (!at_end() && !IsStatementBreak(cur_token().type()) && 190 precedence <= expressions_[cur_token().type()].precedence) { 191 token = Consume(); 192 InfixFunc infix = expressions_[token.type()].infix; 193 if (infix == NULL) { 194 *err_ = Err(token, 195 std::string("Unexpected token '") + 196 token.value().as_string() + std::string("'")); 197 return scoped_ptr<ParseNode>(); 198 } 199 left = (this->*infix)(left.Pass(), token); 200 if (has_error()) 201 return scoped_ptr<ParseNode>(); 202 } 203 204 return left.Pass(); 205 } 206 207 scoped_ptr<ParseNode> Parser::Literal(Token token) { 208 return scoped_ptr<ParseNode>(new LiteralNode(token)).Pass(); 209 } 210 211 scoped_ptr<ParseNode> Parser::Name(Token token) { 212 return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass(); 213 } 214 215 scoped_ptr<ParseNode> Parser::Group(Token token) { 216 scoped_ptr<ParseNode> expr = ParseExpression(); 217 if (has_error()) 218 return scoped_ptr<ParseNode>(); 219 Consume(Token::RIGHT_PAREN, "Expected ')'"); 220 return expr.Pass(); 221 } 222 223 scoped_ptr<ParseNode> Parser::Not(Token token) { 224 scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1); 225 if (has_error()) 226 return scoped_ptr<ParseNode>(); 227 scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode); 228 unary_op->set_op(token); 229 unary_op->set_operand(expr.Pass()); 230 return unary_op.PassAs<ParseNode>(); 231 } 232 233 scoped_ptr<ParseNode> Parser::List(Token node) { 234 scoped_ptr<ParseNode> list(ParseList(Token::RIGHT_BRACKET, true)); 235 if (!has_error() && !at_end()) 236 Consume(Token::RIGHT_BRACKET, "Expected ']'"); 237 return list.Pass(); 238 } 239 240 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left, 241 Token token) { 242 scoped_ptr<ParseNode> right = 243 ParseExpression(expressions_[token.type()].precedence + 1); 244 if (!right) { 245 *err_ = 246 Err(token, 247 "Expected right hand side for '" + token.value().as_string() + "'"); 248 return scoped_ptr<ParseNode>(); 249 } 250 scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode); 251 binary_op->set_op(token); 252 binary_op->set_left(left.Pass()); 253 binary_op->set_right(right.Pass()); 254 return binary_op.PassAs<ParseNode>(); 255 } 256 257 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left, 258 Token token) { 259 scoped_ptr<ListNode> list(new ListNode); 260 list->set_begin_token(token); 261 list->set_end_token(token); 262 scoped_ptr<BlockNode> block; 263 bool has_arg = false; 264 if (Match(Token::LEFT_PAREN)) { 265 // Parsing a function call. 266 has_arg = true; 267 if (Match(Token::RIGHT_PAREN)) { 268 // Nothing, just an empty call. 269 } else { 270 list = ParseList(Token::RIGHT_PAREN, false); 271 if (has_error()) 272 return scoped_ptr<ParseNode>(); 273 Consume(Token::RIGHT_PAREN, "Expected ')' after call"); 274 } 275 // Optionally with a scope. 276 if (LookAhead(Token::LEFT_BRACE)) { 277 block = ParseBlock(); 278 if (has_error()) 279 return scoped_ptr<ParseNode>(); 280 } 281 } 282 283 if (!left && !has_arg) { 284 // Not a function call, just a standalone identifier. 285 return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass(); 286 } 287 scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode); 288 func_call->set_function(token); 289 func_call->set_args(list.Pass()); 290 if (block) 291 func_call->set_block(block.Pass()); 292 return func_call.PassAs<ParseNode>(); 293 } 294 295 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left, 296 Token token) { 297 if (left->AsIdentifier() == NULL) { 298 *err_ = Err(left.get(), "Left-hand side of assignment must be identifier."); 299 return scoped_ptr<ParseNode>(); 300 } 301 scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT); 302 scoped_ptr<BinaryOpNode> assign(new BinaryOpNode); 303 assign->set_op(token); 304 assign->set_left(left.Pass()); 305 assign->set_right(value.Pass()); 306 return assign.PassAs<ParseNode>(); 307 } 308 309 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left, 310 Token token) { 311 // TODO: Maybe support more complex expressions like a[0][0]. This would 312 // require work on the evaluator too. 313 if (left->AsIdentifier() == NULL) { 314 *err_ = Err(left.get(), "May only subscript simple identifiers"); 315 return scoped_ptr<ParseNode>(); 316 } 317 scoped_ptr<ParseNode> value = ParseExpression(); 318 Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript."); 319 scoped_ptr<AccessorNode> accessor(new AccessorNode); 320 accessor->set_base(left->AsIdentifier()->value()); 321 accessor->set_index(value.Pass()); 322 return accessor.PassAs<ParseNode>(); 323 } 324 325 // Does not Consume the start or end token. 326 scoped_ptr<ListNode> Parser::ParseList(Token::Type stop_before, 327 bool allow_trailing_comma) { 328 scoped_ptr<ListNode> list(new ListNode); 329 list->set_begin_token(cur_token()); 330 bool just_got_comma = false; 331 while (!LookAhead(stop_before)) { 332 just_got_comma = false; 333 // Why _OR? We're parsing things that are higher precedence than the , 334 // that separates the items of the list. , should appear lower than 335 // boolean expressions (the lowest of which is OR), but above assignments. 336 list->append_item(ParseExpression(PRECEDENCE_OR)); 337 if (has_error()) 338 return scoped_ptr<ListNode>(); 339 if (at_end()) { 340 *err_ = 341 Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list."); 342 return scoped_ptr<ListNode>(); 343 } 344 just_got_comma = Match(Token::COMMA); 345 } 346 if (just_got_comma && !allow_trailing_comma) { 347 *err_ = Err(cur_token(), "Trailing comma"); 348 return scoped_ptr<ListNode>(); 349 } 350 list->set_end_token(cur_token()); 351 return list.Pass(); 352 } 353 354 scoped_ptr<ParseNode> Parser::ParseFile() { 355 scoped_ptr<BlockNode> file(new BlockNode(false)); 356 for (;;) { 357 if (at_end()) 358 break; 359 scoped_ptr<ParseNode> statement = ParseStatement(); 360 if (!statement) 361 break; 362 file->append_statement(statement.Pass()); 363 } 364 if (!at_end() && !has_error()) 365 *err_ = Err(cur_token(), "Unexpected here, should be newline."); 366 if (has_error()) 367 return scoped_ptr<ParseNode>(); 368 return file.PassAs<ParseNode>(); 369 } 370 371 scoped_ptr<ParseNode> Parser::ParseStatement() { 372 if (LookAhead(Token::LEFT_BRACE)) { 373 return ParseBlock().PassAs<ParseNode>(); 374 } else if (LookAhead(Token::IF)) { 375 return ParseCondition(); 376 } else { 377 // TODO(scottmg): Is this too strict? Just drop all the testing if we want 378 // to allow "pointless" expressions and return ParseExpression() directly. 379 scoped_ptr<ParseNode> stmt = ParseExpression(); 380 if (stmt) { 381 if (stmt->AsFunctionCall() || IsAssignment(stmt.get())) 382 return stmt.Pass(); 383 } 384 if (!has_error()) { 385 Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token(); 386 *err_ = Err(token, "Expecting assignment or function call."); 387 } 388 return scoped_ptr<ParseNode>(); 389 } 390 } 391 392 scoped_ptr<BlockNode> Parser::ParseBlock() { 393 Token begin_token = 394 Consume(Token::LEFT_BRACE, "Expected '{' to start a block."); 395 if (has_error()) 396 return scoped_ptr<BlockNode>(); 397 scoped_ptr<BlockNode> block(new BlockNode(true)); 398 block->set_begin_token(begin_token); 399 400 for (;;) { 401 if (LookAhead(Token::RIGHT_BRACE)) { 402 block->set_end_token(Consume()); 403 break; 404 } 405 406 scoped_ptr<ParseNode> statement = ParseStatement(); 407 if (!statement) 408 return scoped_ptr<BlockNode>(); 409 block->append_statement(statement.Pass()); 410 } 411 return block.Pass(); 412 } 413 414 scoped_ptr<ParseNode> Parser::ParseCondition() { 415 scoped_ptr<ConditionNode> condition(new ConditionNode); 416 Consume(Token::IF, "Expected 'if'"); 417 Consume(Token::LEFT_PAREN, "Expected '(' after 'if'."); 418 condition->set_condition(ParseExpression()); 419 if (IsAssignment(condition->condition())) 420 *err_ = Err(condition->condition(), "Assignment not allowed in 'if'."); 421 Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'."); 422 condition->set_if_true(ParseBlock().Pass()); 423 if (Match(Token::ELSE)) 424 condition->set_if_false(ParseStatement().Pass()); 425 if (has_error()) 426 return scoped_ptr<ParseNode>(); 427 return condition.PassAs<ParseNode>(); 428 } 429