Home | History | Annotate | Download | only in gn
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "tools/gn/parser.h"
      6 
      7 #include "base/logging.h"
      8 #include "tools/gn/functions.h"
      9 #include "tools/gn/operators.h"
     10 #include "tools/gn/token.h"
     11 
     12 // grammar:
     13 //
     14 // file       := (statement)*
     15 // statement  := block | if | assignment
     16 // block      := '{' statement* '}'
     17 // if         := 'if' '(' expr ')' statement [ else ]
     18 // else       := 'else' (if | statement)*
     19 // assignment := ident {'=' | '+=' | '-='} expr
     20 
     21 enum Precedence {
     22   PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
     23   PRECEDENCE_OR = 2,
     24   PRECEDENCE_AND = 3,
     25   PRECEDENCE_EQUALITY = 4,
     26   PRECEDENCE_RELATION = 5,
     27   PRECEDENCE_SUM = 6,
     28   PRECEDENCE_PREFIX = 7,
     29   PRECEDENCE_CALL = 8,
     30   PRECEDENCE_DOT = 9,         // Highest precedence.
     31 };
     32 
     33 // The top-level for blocks/ifs is recursive descent, the expression parser is
     34 // a Pratt parser. The basic idea there is to have the precedences (and
     35 // associativities) encoded relative to each other and only parse up until you
     36 // hit something of that precedence. There's a dispatch table in expressions_
     37 // at the top of parser.cc that describes how each token dispatches if it's
     38 // seen as either a prefix or infix operator, and if it's infix, what its
     39 // precedence is.
     40 //
     41 // Refs:
     42 // - http://javascript.crockford.com/tdop/tdop.html
     43 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
     44 
     45 // Indexed by Token::Type.
     46 ParserHelper Parser::expressions_[] = {
     47   {NULL, NULL, -1},                                             // INVALID
     48   {&Parser::Literal, NULL, -1},                                 // INTEGER
     49   {&Parser::Literal, NULL, -1},                                 // STRING
     50   {&Parser::Literal, NULL, -1},                                 // TRUE_TOKEN
     51   {&Parser::Literal, NULL, -1},                                 // FALSE_TOKEN
     52   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // EQUAL
     53   {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // PLUS
     54   {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // MINUS
     55   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // PLUS_EQUALS
     56   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // MINUS_EQUALS
     57   {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // EQUAL_EQUAL
     58   {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // NOT_EQUAL
     59   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_EQUAL
     60   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_EQUAL
     61   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_THAN
     62   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_THAN
     63   {NULL, &Parser::BinaryOperator, PRECEDENCE_AND},              // BOOLEAN_AND
     64   {NULL, &Parser::BinaryOperator, PRECEDENCE_OR},               // BOOLEAN_OR
     65   {&Parser::Not, NULL, -1},                                     // BANG
     66   {NULL, &Parser::DotOperator, PRECEDENCE_DOT},                 // DOT
     67   {&Parser::Group, NULL, -1},                                   // LEFT_PAREN
     68   {NULL, NULL, -1},                                             // RIGHT_PAREN
     69   {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},         // LEFT_BRACKET
     70   {NULL, NULL, -1},                                             // RIGHT_BRACKET
     71   {NULL, NULL, -1},                                             // LEFT_BRACE
     72   {NULL, NULL, -1},                                             // RIGHT_BRACE
     73   {NULL, NULL, -1},                                             // IF
     74   {NULL, NULL, -1},                                             // ELSE
     75   {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
     76   {NULL, NULL, -1},                                             // COMMA
     77   {NULL, NULL, -1},                                             // COMMENT
     78 };
     79 
     80 Parser::Parser(const std::vector<Token>& tokens, Err* err)
     81     : tokens_(tokens), err_(err), cur_(0) {
     82 }
     83 
     84 Parser::~Parser() {
     85 }
     86 
     87 // static
     88 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
     89                                     Err* err) {
     90   Parser p(tokens, err);
     91   return p.ParseFile().PassAs<ParseNode>();
     92 }
     93 
     94 // static
     95 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
     96                                               Err* err) {
     97   Parser p(tokens, err);
     98   return p.ParseExpression().Pass();
     99 }
    100 
    101 bool Parser::IsAssignment(const ParseNode* node) const {
    102   return node && node->AsBinaryOp() &&
    103          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
    104           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
    105           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
    106 }
    107 
    108 bool Parser::IsStatementBreak(Token::Type token_type) const {
    109   switch (token_type) {
    110     case Token::IDENTIFIER:
    111     case Token::LEFT_BRACE:
    112     case Token::RIGHT_BRACE:
    113     case Token::IF:
    114     case Token::ELSE:
    115       return true;
    116     default:
    117       return false;
    118   }
    119 }
    120 
    121 bool Parser::LookAhead(Token::Type type) {
    122   if (at_end())
    123     return false;
    124   return cur_token().type() == type;
    125 }
    126 
    127 bool Parser::Match(Token::Type type) {
    128   if (!LookAhead(type))
    129     return false;
    130   Consume();
    131   return true;
    132 }
    133 
    134 Token Parser::Consume(Token::Type type, const char* error_message) {
    135   Token::Type types[1] = { type };
    136   return Consume(types, 1, error_message);
    137 }
    138 
    139 Token Parser::Consume(Token::Type* types,
    140                       size_t num_types,
    141                       const char* error_message) {
    142   if (has_error()) {
    143     // Don't overwrite current error, but make progress through tokens so that
    144     // a loop that's expecting a particular token will still terminate.
    145     cur_++;
    146     return Token(Location(), Token::INVALID, base::StringPiece());
    147   }
    148   if (at_end()) {
    149     const char kEOFMsg[] = "I hit EOF instead.";
    150     if (tokens_.empty())
    151       *err_ = Err(Location(), error_message, kEOFMsg);
    152     else
    153       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
    154     return Token(Location(), Token::INVALID, base::StringPiece());
    155   }
    156 
    157   for (size_t i = 0; i < num_types; ++i) {
    158     if (cur_token().type() == types[i])
    159       return tokens_[cur_++];
    160   }
    161   *err_ = Err(cur_token(), error_message);
    162   return Token(Location(), Token::INVALID, base::StringPiece());
    163 }
    164 
    165 Token Parser::Consume() {
    166   return tokens_[cur_++];
    167 }
    168 
    169 scoped_ptr<ParseNode> Parser::ParseExpression() {
    170   return ParseExpression(0);
    171 }
    172 
    173 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
    174   if (at_end())
    175     return scoped_ptr<ParseNode>();
    176 
    177   Token token = Consume();
    178   PrefixFunc prefix = expressions_[token.type()].prefix;
    179 
    180   if (prefix == NULL) {
    181     *err_ = Err(token,
    182                 std::string("Unexpected token '") + token.value().as_string() +
    183                     std::string("'"));
    184     return scoped_ptr<ParseNode>();
    185   }
    186 
    187   scoped_ptr<ParseNode> left = (this->*prefix)(token);
    188   if (has_error())
    189     return left.Pass();
    190 
    191   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
    192          precedence <= expressions_[cur_token().type()].precedence) {
    193     token = Consume();
    194     InfixFunc infix = expressions_[token.type()].infix;
    195     if (infix == NULL) {
    196       *err_ = Err(token,
    197                   std::string("Unexpected token '") +
    198                       token.value().as_string() + std::string("'"));
    199       return scoped_ptr<ParseNode>();
    200     }
    201     left = (this->*infix)(left.Pass(), token);
    202     if (has_error())
    203       return scoped_ptr<ParseNode>();
    204   }
    205 
    206   return left.Pass();
    207 }
    208 
    209 scoped_ptr<ParseNode> Parser::Literal(Token token) {
    210   return scoped_ptr<ParseNode>(new LiteralNode(token)).Pass();
    211 }
    212 
    213 scoped_ptr<ParseNode> Parser::Name(Token token) {
    214   return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
    215 }
    216 
    217 scoped_ptr<ParseNode> Parser::Group(Token token) {
    218   scoped_ptr<ParseNode> expr = ParseExpression();
    219   if (has_error())
    220     return scoped_ptr<ParseNode>();
    221   Consume(Token::RIGHT_PAREN, "Expected ')'");
    222   return expr.Pass();
    223 }
    224 
    225 scoped_ptr<ParseNode> Parser::Not(Token token) {
    226   scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
    227   if (has_error())
    228     return scoped_ptr<ParseNode>();
    229   scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
    230   unary_op->set_op(token);
    231   unary_op->set_operand(expr.Pass());
    232   return unary_op.PassAs<ParseNode>();
    233 }
    234 
    235 scoped_ptr<ParseNode> Parser::List(Token node) {
    236   scoped_ptr<ParseNode> list(ParseList(Token::RIGHT_BRACKET, true));
    237   if (!has_error() && !at_end())
    238     Consume(Token::RIGHT_BRACKET, "Expected ']'");
    239   return list.Pass();
    240 }
    241 
    242 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
    243                                              Token token) {
    244   scoped_ptr<ParseNode> right =
    245       ParseExpression(expressions_[token.type()].precedence + 1);
    246   if (!right) {
    247     *err_ =
    248         Err(token,
    249             "Expected right hand side for '" + token.value().as_string() + "'");
    250     return scoped_ptr<ParseNode>();
    251   }
    252   scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
    253   binary_op->set_op(token);
    254   binary_op->set_left(left.Pass());
    255   binary_op->set_right(right.Pass());
    256   return binary_op.PassAs<ParseNode>();
    257 }
    258 
    259 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
    260                                                Token token) {
    261   scoped_ptr<ListNode> list(new ListNode);
    262   list->set_begin_token(token);
    263   list->set_end_token(token);
    264   scoped_ptr<BlockNode> block;
    265   bool has_arg = false;
    266   if (Match(Token::LEFT_PAREN)) {
    267     // Parsing a function call.
    268     has_arg = true;
    269     if (Match(Token::RIGHT_PAREN)) {
    270       // Nothing, just an empty call.
    271     } else {
    272       list = ParseList(Token::RIGHT_PAREN, false);
    273       if (has_error())
    274         return scoped_ptr<ParseNode>();
    275       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
    276     }
    277     // Optionally with a scope.
    278     if (LookAhead(Token::LEFT_BRACE)) {
    279       block = ParseBlock();
    280       if (has_error())
    281         return scoped_ptr<ParseNode>();
    282     }
    283   }
    284 
    285   if (!left && !has_arg) {
    286     // Not a function call, just a standalone identifier.
    287     return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
    288   }
    289   scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
    290   func_call->set_function(token);
    291   func_call->set_args(list.Pass());
    292   if (block)
    293     func_call->set_block(block.Pass());
    294   return func_call.PassAs<ParseNode>();
    295 }
    296 
    297 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
    298                                          Token token) {
    299   if (left->AsIdentifier() == NULL) {
    300     *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
    301     return scoped_ptr<ParseNode>();
    302   }
    303   scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
    304   scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
    305   assign->set_op(token);
    306   assign->set_left(left.Pass());
    307   assign->set_right(value.Pass());
    308   return assign.PassAs<ParseNode>();
    309 }
    310 
    311 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
    312                                         Token token) {
    313   // TODO: Maybe support more complex expressions like a[0][0]. This would
    314   // require work on the evaluator too.
    315   if (left->AsIdentifier() == NULL) {
    316     *err_ = Err(left.get(), "May only subscript identifiers.",
    317         "The thing on the left hand side of the [] must be an identifier\n"
    318         "and not an expression. If you need this, you'll have to assign the\n"
    319         "value to a temporary before subscripting. Sorry.");
    320     return scoped_ptr<ParseNode>();
    321   }
    322   scoped_ptr<ParseNode> value = ParseExpression();
    323   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
    324   scoped_ptr<AccessorNode> accessor(new AccessorNode);
    325   accessor->set_base(left->AsIdentifier()->value());
    326   accessor->set_index(value.Pass());
    327   return accessor.PassAs<ParseNode>();
    328 }
    329 
    330 scoped_ptr<ParseNode> Parser::DotOperator(scoped_ptr<ParseNode> left,
    331                                           Token token) {
    332   if (left->AsIdentifier() == NULL) {
    333     *err_ = Err(left.get(), "May only use \".\" for identifiers.",
    334         "The thing on the left hand side of the dot must be an identifier\n"
    335         "and not an expression. If you need this, you'll have to assign the\n"
    336         "value to a temporary first. Sorry.");
    337     return scoped_ptr<ParseNode>();
    338   }
    339 
    340   scoped_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
    341   if (!right || !right->AsIdentifier()) {
    342     *err_ = Err(token, "Expected identifier for right-hand-side of \".\"",
    343         "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
    344     return scoped_ptr<ParseNode>();
    345   }
    346 
    347   scoped_ptr<AccessorNode> accessor(new AccessorNode);
    348   accessor->set_base(left->AsIdentifier()->value());
    349   accessor->set_member(scoped_ptr<IdentifierNode>(
    350       static_cast<IdentifierNode*>(right.release())));
    351   return accessor.PassAs<ParseNode>();
    352 }
    353 
    354 // Does not Consume the start or end token.
    355 scoped_ptr<ListNode> Parser::ParseList(Token::Type stop_before,
    356                                        bool allow_trailing_comma) {
    357   scoped_ptr<ListNode> list(new ListNode);
    358   list->set_begin_token(cur_token());
    359   bool just_got_comma = false;
    360   bool first_time = true;
    361   while (!LookAhead(stop_before)) {
    362     if (!first_time) {
    363       if (!just_got_comma) {
    364         // Require commas separate things in lists.
    365         *err_ = Err(cur_token(), "Expected comma between items.");
    366         return scoped_ptr<ListNode>();
    367       }
    368     }
    369     first_time = false;
    370 
    371     // Why _OR? We're parsing things that are higher precedence than the ,
    372     // that separates the items of the list. , should appear lower than
    373     // boolean expressions (the lowest of which is OR), but above assignments.
    374     list->append_item(ParseExpression(PRECEDENCE_OR));
    375     if (has_error())
    376       return scoped_ptr<ListNode>();
    377     if (at_end()) {
    378       *err_ =
    379           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
    380       return scoped_ptr<ListNode>();
    381     }
    382     just_got_comma = Match(Token::COMMA);
    383   }
    384   if (just_got_comma && !allow_trailing_comma) {
    385     *err_ = Err(cur_token(), "Trailing comma");
    386     return scoped_ptr<ListNode>();
    387   }
    388   list->set_end_token(cur_token());
    389   return list.Pass();
    390 }
    391 
    392 scoped_ptr<ParseNode> Parser::ParseFile() {
    393   scoped_ptr<BlockNode> file(new BlockNode(false));
    394   for (;;) {
    395     if (at_end())
    396       break;
    397     scoped_ptr<ParseNode> statement = ParseStatement();
    398     if (!statement)
    399       break;
    400     file->append_statement(statement.Pass());
    401   }
    402   if (!at_end() && !has_error())
    403     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
    404   if (has_error())
    405     return scoped_ptr<ParseNode>();
    406   return file.PassAs<ParseNode>();
    407 }
    408 
    409 scoped_ptr<ParseNode> Parser::ParseStatement() {
    410   if (LookAhead(Token::LEFT_BRACE)) {
    411     return ParseBlock().PassAs<ParseNode>();
    412   } else if (LookAhead(Token::IF)) {
    413     return ParseCondition();
    414   } else {
    415     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
    416     // to allow "pointless" expressions and return ParseExpression() directly.
    417     scoped_ptr<ParseNode> stmt = ParseExpression();
    418     if (stmt) {
    419       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
    420         return stmt.Pass();
    421     }
    422     if (!has_error()) {
    423       Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
    424       *err_ = Err(token, "Expecting assignment or function call.");
    425     }
    426     return scoped_ptr<ParseNode>();
    427   }
    428 }
    429 
    430 scoped_ptr<BlockNode> Parser::ParseBlock() {
    431   Token begin_token =
    432       Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
    433   if (has_error())
    434     return scoped_ptr<BlockNode>();
    435   scoped_ptr<BlockNode> block(new BlockNode(true));
    436   block->set_begin_token(begin_token);
    437 
    438   for (;;) {
    439     if (LookAhead(Token::RIGHT_BRACE)) {
    440       block->set_end_token(Consume());
    441       break;
    442     }
    443 
    444     scoped_ptr<ParseNode> statement = ParseStatement();
    445     if (!statement)
    446       return scoped_ptr<BlockNode>();
    447     block->append_statement(statement.Pass());
    448   }
    449   return block.Pass();
    450 }
    451 
    452 scoped_ptr<ParseNode> Parser::ParseCondition() {
    453   scoped_ptr<ConditionNode> condition(new ConditionNode);
    454   Consume(Token::IF, "Expected 'if'");
    455   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
    456   condition->set_condition(ParseExpression());
    457   if (IsAssignment(condition->condition()))
    458     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
    459   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
    460   condition->set_if_true(ParseBlock().Pass());
    461   if (Match(Token::ELSE))
    462     condition->set_if_false(ParseStatement().Pass());
    463   if (has_error())
    464     return scoped_ptr<ParseNode>();
    465   return condition.PassAs<ParseNode>();
    466 }
    467