Home | History | Annotate | Download | only in gn
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "tools/gn/parser.h"
      6 
      7 #include "base/logging.h"
      8 #include "tools/gn/functions.h"
      9 #include "tools/gn/operators.h"
     10 #include "tools/gn/token.h"
     11 
     12 // grammar:
     13 //
     14 // file       := (statement)*
     15 // statement  := block | if | assignment
     16 // block      := '{' statement* '}'
     17 // if         := 'if' '(' expr ')' statement [ else ]
     18 // else       := 'else' (if | statement)*
     19 // assignment := ident {'=' | '+=' | '-='} expr
     20 
     21 enum Precedence {
     22   PRECEDENCE_ASSIGNMENT = 1,
     23   PRECEDENCE_OR = 2,
     24   PRECEDENCE_AND = 3,
     25   PRECEDENCE_EQUALITY = 4,
     26   PRECEDENCE_RELATION = 5,
     27   PRECEDENCE_SUM = 6,
     28   PRECEDENCE_PREFIX = 7,
     29   PRECEDENCE_CALL = 8,
     30 };
     31 
     32 // The top-level for blocks/ifs is still recursive descent, the expression
     33 // parser is a Pratt parser. The basic idea there is to have the precedences
     34 // (and associativities) encoded relative to each other and only parse up
     35 // until you hit something of that precedence. There's a dispatch table in
     36 // expressions_ at the top of parser.cc that describes how each token
     37 // dispatches if it's seen as either a prefix or infix operator, and if it's
     38 // infix, what its precedence is.
     39 //
     40 // Refs:
     41 // - http://javascript.crockford.com/tdop/tdop.html
     42 // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
     43 
     44 // Indexed by Token::Type.
     45 ParserHelper Parser::expressions_[] = {
     46   {NULL, NULL, -1},                                             // INVALID
     47   {&Parser::Literal, NULL, -1},                                 // INTEGER
     48   {&Parser::Literal, NULL, -1},                                 // STRING
     49   {&Parser::Literal, NULL, -1},                                 // TRUE_TOKEN
     50   {&Parser::Literal, NULL, -1},                                 // FALSE_TOKEN
     51   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // EQUAL
     52   {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // PLUS
     53   {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM},              // MINUS
     54   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // PLUS_EQUALS
     55   {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},           // MINUS_EQUALS
     56   {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // EQUAL_EQUAL
     57   {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},         // NOT_EQUAL
     58   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_EQUAL
     59   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_EQUAL
     60   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // LESS_THAN
     61   {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION},         // GREATER_THAN
     62   {NULL, &Parser::BinaryOperator, PRECEDENCE_AND},              // BOOLEAN_AND
     63   {NULL, &Parser::BinaryOperator, PRECEDENCE_OR},               // BOOLEAN_OR
     64   {&Parser::Not, NULL, -1},                                     // BANG
     65   {&Parser::Group, NULL, -1},                                   // LEFT_PAREN
     66   {NULL, NULL, -1},                                             // RIGHT_PAREN
     67   {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},         // LEFT_BRACKET
     68   {NULL, NULL, -1},                                             // RIGHT_BRACKET
     69   {NULL, NULL, -1},                                             // LEFT_BRACE
     70   {NULL, NULL, -1},                                             // RIGHT_BRACE
     71   {NULL, NULL, -1},                                             // IF
     72   {NULL, NULL, -1},                                             // ELSE
     73   {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
     74   {NULL, NULL, -1},                                             // COMMA
     75   {NULL, NULL, -1},                                             // COMMENT
     76 };
     77 
     78 Parser::Parser(const std::vector<Token>& tokens, Err* err)
     79     : tokens_(tokens), err_(err), cur_(0) {
     80 }
     81 
     82 Parser::~Parser() {
     83 }
     84 
     85 // static
     86 scoped_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
     87                                     Err* err) {
     88   Parser p(tokens, err);
     89   return p.ParseFile().PassAs<ParseNode>();
     90 }
     91 
     92 // static
     93 scoped_ptr<ParseNode> Parser::ParseExpression(const std::vector<Token>& tokens,
     94                                               Err* err) {
     95   Parser p(tokens, err);
     96   return p.ParseExpression().Pass();
     97 }
     98 
     99 bool Parser::IsAssignment(const ParseNode* node) const {
    100   return node && node->AsBinaryOp() &&
    101          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
    102           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
    103           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
    104 }
    105 
    106 bool Parser::IsStatementBreak(Token::Type token_type) const {
    107   switch (token_type) {
    108     case Token::IDENTIFIER:
    109     case Token::LEFT_BRACE:
    110     case Token::RIGHT_BRACE:
    111     case Token::IF:
    112     case Token::ELSE:
    113       return true;
    114     default:
    115       return false;
    116   }
    117 }
    118 
    119 bool Parser::LookAhead(Token::Type type) {
    120   if (at_end())
    121     return false;
    122   return cur_token().type() == type;
    123 }
    124 
    125 bool Parser::Match(Token::Type type) {
    126   if (!LookAhead(type))
    127     return false;
    128   Consume();
    129   return true;
    130 }
    131 
    132 Token Parser::Consume(Token::Type type, const char* error_message) {
    133   Token::Type types[1] = { type };
    134   return Consume(types, 1, error_message);
    135 }
    136 
    137 Token Parser::Consume(Token::Type* types,
    138                       size_t num_types,
    139                       const char* error_message) {
    140   if (has_error()) {
    141     // Don't overwrite current error, but make progress through tokens so that
    142     // a loop that's expecting a particular token will still terminate.
    143     cur_++;
    144     return Token(Location(), Token::INVALID, base::StringPiece());
    145   }
    146   if (at_end()) {
    147     const char kEOFMsg[] = "I hit EOF instead.";
    148     if (tokens_.empty())
    149       *err_ = Err(Location(), error_message, kEOFMsg);
    150     else
    151       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
    152     return Token(Location(), Token::INVALID, base::StringPiece());
    153   }
    154 
    155   for (size_t i = 0; i < num_types; ++i) {
    156     if (cur_token().type() == types[i])
    157       return tokens_[cur_++];
    158   }
    159   *err_ = Err(cur_token(), error_message);
    160   return Token(Location(), Token::INVALID, base::StringPiece());
    161 }
    162 
    163 Token Parser::Consume() {
    164   return tokens_[cur_++];
    165 }
    166 
    167 scoped_ptr<ParseNode> Parser::ParseExpression() {
    168   return ParseExpression(0);
    169 }
    170 
    171 scoped_ptr<ParseNode> Parser::ParseExpression(int precedence) {
    172   if (at_end())
    173     return scoped_ptr<ParseNode>();
    174 
    175   Token token = Consume();
    176   PrefixFunc prefix = expressions_[token.type()].prefix;
    177 
    178   if (prefix == NULL) {
    179     *err_ = Err(token,
    180                 std::string("Unexpected token '") + token.value().as_string() +
    181                     std::string("'"));
    182     return scoped_ptr<ParseNode>();
    183   }
    184 
    185   scoped_ptr<ParseNode> left = (this->*prefix)(token);
    186   if (has_error())
    187     return left.Pass();
    188 
    189   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
    190          precedence <= expressions_[cur_token().type()].precedence) {
    191     token = Consume();
    192     InfixFunc infix = expressions_[token.type()].infix;
    193     if (infix == NULL) {
    194       *err_ = Err(token,
    195                   std::string("Unexpected token '") +
    196                       token.value().as_string() + std::string("'"));
    197       return scoped_ptr<ParseNode>();
    198     }
    199     left = (this->*infix)(left.Pass(), token);
    200     if (has_error())
    201       return scoped_ptr<ParseNode>();
    202   }
    203 
    204   return left.Pass();
    205 }
    206 
    207 scoped_ptr<ParseNode> Parser::Literal(Token token) {
    208   return scoped_ptr<ParseNode>(new LiteralNode(token)).Pass();
    209 }
    210 
    211 scoped_ptr<ParseNode> Parser::Name(Token token) {
    212   return IdentifierOrCall(scoped_ptr<ParseNode>(), token).Pass();
    213 }
    214 
    215 scoped_ptr<ParseNode> Parser::Group(Token token) {
    216   scoped_ptr<ParseNode> expr = ParseExpression();
    217   if (has_error())
    218     return scoped_ptr<ParseNode>();
    219   Consume(Token::RIGHT_PAREN, "Expected ')'");
    220   return expr.Pass();
    221 }
    222 
    223 scoped_ptr<ParseNode> Parser::Not(Token token) {
    224   scoped_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
    225   if (has_error())
    226     return scoped_ptr<ParseNode>();
    227   scoped_ptr<UnaryOpNode> unary_op(new UnaryOpNode);
    228   unary_op->set_op(token);
    229   unary_op->set_operand(expr.Pass());
    230   return unary_op.PassAs<ParseNode>();
    231 }
    232 
    233 scoped_ptr<ParseNode> Parser::List(Token node) {
    234   scoped_ptr<ParseNode> list(ParseList(Token::RIGHT_BRACKET, true));
    235   if (!has_error() && !at_end())
    236     Consume(Token::RIGHT_BRACKET, "Expected ']'");
    237   return list.Pass();
    238 }
    239 
    240 scoped_ptr<ParseNode> Parser::BinaryOperator(scoped_ptr<ParseNode> left,
    241                                              Token token) {
    242   scoped_ptr<ParseNode> right =
    243       ParseExpression(expressions_[token.type()].precedence + 1);
    244   if (!right) {
    245     *err_ =
    246         Err(token,
    247             "Expected right hand side for '" + token.value().as_string() + "'");
    248     return scoped_ptr<ParseNode>();
    249   }
    250   scoped_ptr<BinaryOpNode> binary_op(new BinaryOpNode);
    251   binary_op->set_op(token);
    252   binary_op->set_left(left.Pass());
    253   binary_op->set_right(right.Pass());
    254   return binary_op.PassAs<ParseNode>();
    255 }
    256 
    257 scoped_ptr<ParseNode> Parser::IdentifierOrCall(scoped_ptr<ParseNode> left,
    258                                                Token token) {
    259   scoped_ptr<ListNode> list(new ListNode);
    260   list->set_begin_token(token);
    261   list->set_end_token(token);
    262   scoped_ptr<BlockNode> block;
    263   bool has_arg = false;
    264   if (Match(Token::LEFT_PAREN)) {
    265     // Parsing a function call.
    266     has_arg = true;
    267     if (Match(Token::RIGHT_PAREN)) {
    268       // Nothing, just an empty call.
    269     } else {
    270       list = ParseList(Token::RIGHT_PAREN, false);
    271       if (has_error())
    272         return scoped_ptr<ParseNode>();
    273       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
    274     }
    275     // Optionally with a scope.
    276     if (LookAhead(Token::LEFT_BRACE)) {
    277       block = ParseBlock();
    278       if (has_error())
    279         return scoped_ptr<ParseNode>();
    280     }
    281   }
    282 
    283   if (!left && !has_arg) {
    284     // Not a function call, just a standalone identifier.
    285     return scoped_ptr<ParseNode>(new IdentifierNode(token)).Pass();
    286   }
    287   scoped_ptr<FunctionCallNode> func_call(new FunctionCallNode);
    288   func_call->set_function(token);
    289   func_call->set_args(list.Pass());
    290   if (block)
    291     func_call->set_block(block.Pass());
    292   return func_call.PassAs<ParseNode>();
    293 }
    294 
    295 scoped_ptr<ParseNode> Parser::Assignment(scoped_ptr<ParseNode> left,
    296                                          Token token) {
    297   if (left->AsIdentifier() == NULL) {
    298     *err_ = Err(left.get(), "Left-hand side of assignment must be identifier.");
    299     return scoped_ptr<ParseNode>();
    300   }
    301   scoped_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
    302   scoped_ptr<BinaryOpNode> assign(new BinaryOpNode);
    303   assign->set_op(token);
    304   assign->set_left(left.Pass());
    305   assign->set_right(value.Pass());
    306   return assign.PassAs<ParseNode>();
    307 }
    308 
    309 scoped_ptr<ParseNode> Parser::Subscript(scoped_ptr<ParseNode> left,
    310                                         Token token) {
    311   // TODO: Maybe support more complex expressions like a[0][0]. This would
    312   // require work on the evaluator too.
    313   if (left->AsIdentifier() == NULL) {
    314     *err_ = Err(left.get(), "May only subscript simple identifiers");
    315     return scoped_ptr<ParseNode>();
    316   }
    317   scoped_ptr<ParseNode> value = ParseExpression();
    318   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
    319   scoped_ptr<AccessorNode> accessor(new AccessorNode);
    320   accessor->set_base(left->AsIdentifier()->value());
    321   accessor->set_index(value.Pass());
    322   return accessor.PassAs<ParseNode>();
    323 }
    324 
    325 // Does not Consume the start or end token.
    326 scoped_ptr<ListNode> Parser::ParseList(Token::Type stop_before,
    327                                        bool allow_trailing_comma) {
    328   scoped_ptr<ListNode> list(new ListNode);
    329   list->set_begin_token(cur_token());
    330   bool just_got_comma = false;
    331   while (!LookAhead(stop_before)) {
    332     just_got_comma = false;
    333     // Why _OR? We're parsing things that are higher precedence than the ,
    334     // that separates the items of the list. , should appear lower than
    335     // boolean expressions (the lowest of which is OR), but above assignments.
    336     list->append_item(ParseExpression(PRECEDENCE_OR));
    337     if (has_error())
    338       return scoped_ptr<ListNode>();
    339     if (at_end()) {
    340       *err_ =
    341           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
    342       return scoped_ptr<ListNode>();
    343     }
    344     just_got_comma = Match(Token::COMMA);
    345   }
    346   if (just_got_comma && !allow_trailing_comma) {
    347     *err_ = Err(cur_token(), "Trailing comma");
    348     return scoped_ptr<ListNode>();
    349   }
    350   list->set_end_token(cur_token());
    351   return list.Pass();
    352 }
    353 
    354 scoped_ptr<ParseNode> Parser::ParseFile() {
    355   scoped_ptr<BlockNode> file(new BlockNode(false));
    356   for (;;) {
    357     if (at_end())
    358       break;
    359     scoped_ptr<ParseNode> statement = ParseStatement();
    360     if (!statement)
    361       break;
    362     file->append_statement(statement.Pass());
    363   }
    364   if (!at_end() && !has_error())
    365     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
    366   if (has_error())
    367     return scoped_ptr<ParseNode>();
    368   return file.PassAs<ParseNode>();
    369 }
    370 
    371 scoped_ptr<ParseNode> Parser::ParseStatement() {
    372   if (LookAhead(Token::LEFT_BRACE)) {
    373     return ParseBlock().PassAs<ParseNode>();
    374   } else if (LookAhead(Token::IF)) {
    375     return ParseCondition();
    376   } else {
    377     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
    378     // to allow "pointless" expressions and return ParseExpression() directly.
    379     scoped_ptr<ParseNode> stmt = ParseExpression();
    380     if (stmt) {
    381       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
    382         return stmt.Pass();
    383     }
    384     if (!has_error()) {
    385       Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token();
    386       *err_ = Err(token, "Expecting assignment or function call.");
    387     }
    388     return scoped_ptr<ParseNode>();
    389   }
    390 }
    391 
    392 scoped_ptr<BlockNode> Parser::ParseBlock() {
    393   Token begin_token =
    394       Consume(Token::LEFT_BRACE, "Expected '{' to start a block.");
    395   if (has_error())
    396     return scoped_ptr<BlockNode>();
    397   scoped_ptr<BlockNode> block(new BlockNode(true));
    398   block->set_begin_token(begin_token);
    399 
    400   for (;;) {
    401     if (LookAhead(Token::RIGHT_BRACE)) {
    402       block->set_end_token(Consume());
    403       break;
    404     }
    405 
    406     scoped_ptr<ParseNode> statement = ParseStatement();
    407     if (!statement)
    408       return scoped_ptr<BlockNode>();
    409     block->append_statement(statement.Pass());
    410   }
    411   return block.Pass();
    412 }
    413 
    414 scoped_ptr<ParseNode> Parser::ParseCondition() {
    415   scoped_ptr<ConditionNode> condition(new ConditionNode);
    416   Consume(Token::IF, "Expected 'if'");
    417   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
    418   condition->set_condition(ParseExpression());
    419   if (IsAssignment(condition->condition()))
    420     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
    421   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
    422   condition->set_if_true(ParseBlock().Pass());
    423   if (Match(Token::ELSE))
    424     condition->set_if_false(ParseStatement().Pass());
    425   if (has_error())
    426     return scoped_ptr<ParseNode>();
    427   return condition.PassAs<ParseNode>();
    428 }
    429