Home | History | Annotate | Download | only in sourcedr
      1 #!/usr/bin/env python3
      2 
      3 """Ninja File Parser.
      4 """
      5 
      6 from __future__ import print_function
      7 
      8 import argparse
      9 import collections
     10 import os
     11 import re
     12 import struct
     13 import sys
     14 
     15 DEBUG_ALLOC = False
     16 
     17 if DEBUG_ALLOC:
     18     try:
     19         import tracemalloc
     20     except ImportError:
     21         DEBUG_ALLOC = False
     22 
     23 try:
     24     from cStringIO import StringIO  # Python 2
     25 except ImportError:
     26     from io import StringIO  # Python 3
     27 
     28 try:
     29     from sys import intern
     30 except ImportError:
     31     pass  # In Python 2, intern() is a built-in function.
     32 
     33 if sys.version_info < (3,):
     34     # Wrap built-in open() function to ignore encoding in Python 2.
     35     _builtin_open = open
     36     def open(path, mode, encoding=None):
     37         return _builtin_open(path, mode)
     38 
     39     # Replace built-in zip() function with itertools.izip
     40     from itertools import izip as zip
     41 
     42 
     43 class EvalEnv(dict):
     44     __slots__ = ('parent')
     45 
     46 
     47     def __init__(self, *args, **kwargs):
     48         super(EvalEnv, self).__init__(*args, **kwargs)
     49         self.parent = None
     50 
     51 
     52     def get_recursive(self, key, default=None):
     53         try:
     54             return self[key]
     55         except KeyError:
     56             if self.parent:
     57                 return self.parent.get_recursive(key, default)
     58             return default
     59 
     60 
     61 class BuildEvalEnv(EvalEnv):
     62     __slots__ = ('_build_env', '_rule_env')
     63 
     64 
     65     def __init__(self, build_env, rule_env):
     66         self._build_env = build_env
     67         self._rule_env = rule_env
     68 
     69 
     70     def get_recursive(self, key, default=None):
     71         try:
     72             return self._build_env[key]
     73         except KeyError:
     74             pass
     75 
     76         if self._rule_env:
     77             try:
     78                 return self._rule_env[key]
     79             except KeyError:
     80                 pass
     81 
     82         if self._build_env.parent:
     83             return self._build_env.parent.get_recursive(key, default)
     84         return default
     85 
     86 
     87 class EvalError(ValueError):
     88     """Exceptions for ``EvalString`` evalution errors."""
     89     pass
     90 
     91 
     92 class EvalCircularError(EvalError):
     93     """Exception for circular substitution in ``EvalString``."""
     94 
     95 
     96     def __init__(self, expanded_vars):
     97         super(EvalCircularError, self).__init__(
     98                 'circular evaluation: ' + ' -> '.join(expanded_vars))
     99 
    100 
    101 class EvalString(tuple):
    102     """Strings with variables to be substituted."""
    103 
    104 
    105     def __bool__(self):
    106         """Check whether this is an empty string."""
    107         return len(self) > 1
    108 
    109 
    110     def __nonzero__(self):
    111         """Check whether this is an empty string (Python2)."""
    112         return self.__bool__()
    113 
    114 
    115     def create_iters(self):
    116         """Create descriptors and segments iterators."""
    117         curr_iter = iter(self)
    118         descs = next(curr_iter)
    119         return zip(descs, curr_iter)
    120 
    121 
    122 def _eval_string(s, env, expanded_vars, result_buf):
    123     """Evaluate each segments in ``EvalString`` and write result to the
    124     given StringIO buffer.
    125 
    126     Args:
    127         env: A ``dict`` that maps a name to ``EvalString`` object.
    128         expanded_vars: A ``list`` that keeps the variable under evaluation.
    129         result_buf: Output buffer.
    130     """
    131     if type(s) is str:
    132         result_buf.write(s)
    133         return
    134 
    135     for desc, seg in s.create_iters():
    136         if desc == 't':
    137             # Append raw text
    138             result_buf.write(seg)
    139         else:
    140             # Substitute variables
    141             varname = seg
    142             if varname in expanded_vars:
    143                 raise EvalCircularError(expanded_vars + [varname])
    144             expanded_vars.append(varname)
    145             try:
    146                 next_es = env.get_recursive(varname)
    147                 if next_es:
    148                     _eval_string(next_es, env, expanded_vars, result_buf)
    149             finally:
    150                 expanded_vars.pop()
    151 
    152 
    153 def eval_string(s, env):
    154     """Evaluate a ``str`` or ``EvalString`` in an environment.
    155 
    156     Args:
    157         env: A ``dict`` that maps a name to an ``EvalString`` object.
    158 
    159     Returns:
    160         str: The result of evaluation.
    161 
    162     Raises:
    163         EvalNameError: Unknown variable name occurs.
    164         EvalCircularError: Circular variable substitution occurs.
    165     """
    166     expanded_vars = []
    167     result_buf = StringIO()
    168     _eval_string(s, env, expanded_vars, result_buf)
    169     return result_buf.getvalue()
    170 
    171 
    172 def eval_path_strings(strs, env):
    173     """Evalute a list of ``EvalString`` in an environment and normalize paths.
    174 
    175     Args:
    176         strs: A list of ``EvalString`` which should be treated as paths.
    177         env: A ``dict`` that maps a name to an ``EvalString`` object.
    178 
    179     Returns:
    180         The list of evaluated strings.
    181     """
    182     return [intern(os.path.normpath(eval_string(s, env))) for s in strs]
    183 
    184 
    185 class EvalStringBuilder(object):
    186     def __init__(self):
    187         self._segs = ['']
    188 
    189 
    190     def append_raw(self, text):
    191         descs = self._segs[0]
    192         if descs and descs[-1] == 't':
    193             self._segs[-1] += text
    194         else:
    195             self._segs[0] += 't'
    196             self._segs.append(text)
    197         return self
    198 
    199 
    200     def append_var(self, varname):
    201         self._segs[0] += 'v'
    202         self._segs.append(varname)
    203         return self
    204 
    205 
    206     def getvalue(self):
    207         return EvalString(intern(seg) for seg in self._segs)
    208 
    209 
    210 class Build(object):
    211     __slots__ = ('explicit_outs', 'implicit_outs', 'rule', 'explicit_ins',
    212                  'implicit_ins', 'prerequisites', 'bindings',
    213                  'depfile_implicit_ins')
    214 
    215 
    216 class Rule(object):
    217     __slots__ = ('name', 'bindings')
    218 
    219 
    220 class Pool(object):
    221     __slots__ = ('name', 'bindings')
    222 
    223 
    224 class Default(object):
    225     __slots__ = ('outs')
    226 
    227 
    228 Token = collections.namedtuple('Token', 'kind line column value')
    229 
    230 
    231 class TK(object):
    232     """Token ID enumerations."""
    233 
    234     # Trivial tokens
    235     EOF = 0
    236     COMMENT = 1
    237     NEWLINE = 2
    238     SPACE = 3
    239     ESC_NEWLINE = 4
    240     IDENT = 5
    241     PIPE2 = 6
    242     PIPE = 7
    243     COLON = 8
    244     ASSIGN = 9
    245 
    246     # Non-trivial tokens
    247     PATH = 10
    248     STRING = 11
    249 
    250 
    251 class TokenMatcher(object):
    252     def __init__(self, patterns):
    253         self._matcher = re.compile('|'.join('(' + p + ')' for k, p in patterns))
    254         self._kinds = [k for k, p in patterns]
    255 
    256 
    257     def match(self, buf, pos):
    258         match = self._matcher.match(buf, pos)
    259         if not match:
    260             return None
    261         return (self._kinds[match.lastindex - 1], match.start(), match.end())
    262 
    263 
    264 class ParseError(ValueError):
    265     def __init__(self, path, line, column, reason=None):
    266         self.path = path
    267         self.line = line
    268         self.column = column
    269         self.reason = reason
    270 
    271 
    272     def __repr__(self):
    273         s = 'ParseError: {}:{}:{}'.format(self.path, self.line, self.column)
    274         if self.reason:
    275             s += ': ' + self.reason
    276         return s
    277 
    278 
    279 class Lexer(object):
    280     def __init__(self, lines_iterable, path='<stdin>', encoding='utf-8'):
    281         self.encoding = encoding
    282         self.path = path
    283 
    284         self._line_iter = iter(lines_iterable)
    285         self._line_buf = None
    286         self._line = 0
    287         self._line_pos = 0
    288         self._line_end = 0
    289 
    290         self._line_start = True
    291 
    292         self._next_token = None
    293         self._next_pos = None
    294 
    295 
    296     def raise_error(self, reason=None):
    297         raise ParseError(self.path, self._line, self._line_pos + 1, reason)
    298 
    299 
    300     def _read_next_line(self):
    301         try:
    302             self._line_buf = next(self._line_iter)
    303             self._line_pos = 0
    304             self._line_end = len(self._line_buf)
    305             self._line += 1
    306             return True
    307         except StopIteration:
    308             self._line_buf = None
    309             return False
    310 
    311 
    312     def _ensure_line(self):
    313         if self._line_buf and self._line_pos < self._line_end:
    314             return True
    315         return self._read_next_line()
    316 
    317     _COMMENT_MATCHER = re.compile(r'[ \t]*(?:#[^\n]*)?(?=\n)')
    318 
    319 
    320     def _ensure_non_comment_line(self):
    321         if not self._ensure_line():
    322             return False
    323         # Match comments or spaces
    324         match = self._COMMENT_MATCHER.match(self._line_buf)
    325         if not match:
    326             return True
    327         # Move the cursor to the newline character
    328         self._line_pos = match.end()
    329         return True
    330 
    331     _SPACE_MATCHER = re.compile(r'[ \t]+')
    332 
    333 
    334     def _skip_space(self):
    335         match = self._SPACE_MATCHER.match(self._line_buf, self._line_pos)
    336         if match:
    337             self._line_pos = match.end()
    338 
    339     _SIMPLE_TOKEN_MATCHER = TokenMatcher([
    340         (TK.COMMENT, r'#[^\n]*'),
    341         (TK.NEWLINE, r'[\r\n]'),
    342         (TK.SPACE, r'[ \t]+'),
    343         (TK.ESC_NEWLINE, r'\$[\r\n]'),
    344         (TK.IDENT, r'[\w_.-]+'),
    345         (TK.PIPE2, r'\|\|'),
    346         (TK.PIPE, r'\|'),
    347         (TK.COLON, r':'),
    348         (TK.ASSIGN, r'='),
    349     ])
    350 
    351 
    352     def peek(self):
    353         if self._next_token is not None:
    354             return self._next_token
    355         while True:
    356             if not self._ensure_non_comment_line():
    357                 return Token(TK.EOF, self._line, self._line_pos + 1, '')
    358 
    359             match = self._SIMPLE_TOKEN_MATCHER.match(
    360                     self._line_buf, self._line_pos)
    361             if not match:
    362                 return None
    363             kind, start, end = match
    364 
    365             # Skip comments and spaces
    366             if ((kind == TK.SPACE and not self._line_start) or
    367                 (kind == TK.ESC_NEWLINE) or
    368                 (kind == TK.COMMENT)):
    369                 self._line_pos = end
    370                 continue
    371 
    372             # Save the peaked token
    373             token = Token(kind, self._line, self._line_pos + 1,
    374                           self._line_buf[start:end])
    375             self._next_token = token
    376             self._next_pos = end
    377             return token
    378 
    379 
    380     def lex(self):
    381         token = self.peek()
    382         if not token:
    383             self.raise_error()
    384         self._line_start = token.kind == TK.NEWLINE
    385         self._line_pos = self._next_pos
    386         self._next_token = None
    387         self._next_pos = None
    388         return token
    389 
    390 
    391     def lex_match(self, match_set):
    392         token = self.lex()
    393         if token.kind not in match_set:
    394             self.raise_error()
    395         return token
    396 
    397 
    398     class STR_TK(object):
    399         END = 0
    400         CHARS = 1
    401         ESC_CHAR = 2
    402         ESC_NEWLINE = 3
    403         VAR = 4
    404         CURVE_VAR = 5
    405 
    406 
    407     _PATH_TOKEN_MATCHER = TokenMatcher([
    408         (STR_TK.END, r'[ \t\n|:]'),
    409         (STR_TK.CHARS, r'[^ \t\n|:$]+'),
    410         (STR_TK.ESC_CHAR, r'\$[^\n{\w_-]'),
    411         (STR_TK.ESC_NEWLINE, r'\$\n[ \t]*'),
    412         (STR_TK.VAR, r'\$[\w_-]+'),
    413         (STR_TK.CURVE_VAR, r'\$\{[\w_.-]+\}'),
    414     ])
    415 
    416 
    417     _STR_TOKEN_MATCHER = TokenMatcher([
    418         (STR_TK.END, r'\n+'),
    419         (STR_TK.CHARS, r'[^\n$]+'),
    420         (STR_TK.ESC_CHAR, r'\$[^\n{\w_-]'),
    421         (STR_TK.ESC_NEWLINE, r'\$\n[ \t]*'),
    422         (STR_TK.VAR, r'\$[\w_-]+'),
    423         (STR_TK.CURVE_VAR, r'\$\{[\w_.-]+\}'),
    424     ])
    425 
    426 
    427     def _lex_string_or_path(self, matcher, result_kind):
    428         self._ensure_line()
    429         self._skip_space()
    430 
    431         start_line = self._line
    432         start_column = self._line_pos + 1
    433 
    434         builder = EvalStringBuilder()
    435 
    436         while True:
    437             if not self._ensure_line():
    438                 break
    439 
    440             match = matcher.match(self._line_buf, self._line_pos)
    441             if not match:
    442                 self.raise_error('unknown character sequence')
    443 
    444             kind, start, end = match
    445             if kind == self.STR_TK.END:
    446                 break
    447 
    448             self._line_pos = end
    449 
    450             if kind == self.STR_TK.CHARS:
    451                 builder.append_raw(self._line_buf[start:end])
    452             elif kind == self.STR_TK.ESC_CHAR:
    453                 ch = self._line_buf[start + 1]
    454                 if ch in ' \t:$':
    455                     builder.append_raw(ch)
    456                 else:
    457                     self.raise_error('bad escape sequence')
    458             elif kind == self.STR_TK.ESC_NEWLINE:
    459                 if not self._read_next_line():
    460                     break
    461                 self._skip_space()
    462             elif kind == self.STR_TK.VAR:
    463                 builder.append_var(self._line_buf[start + 1 : end])
    464             else:
    465                 assert kind == self.STR_TK.CURVE_VAR
    466                 builder.append_var(self._line_buf[start + 2 : end - 1])
    467 
    468         self._next_token = None
    469         return Token(result_kind, start_line, start_column, builder.getvalue())
    470 
    471 
    472     def lex_path(self):
    473         return self._lex_string_or_path(self._PATH_TOKEN_MATCHER, TK.PATH)
    474 
    475 
    476     def lex_string(self):
    477         return self._lex_string_or_path(self._STR_TOKEN_MATCHER, TK.STRING)
    478 
    479 
    480 Manifest = collections.namedtuple('Manifest', 'builds rules pools defaults')
    481 
    482 
    483 class Parser(object):
    484     """Ninja Manifest Parser
    485 
    486     This parser parses ninja-build manifest files, such as::
    487 
    488         cflags = -Wall
    489 
    490         pool cc_pool
    491           depth = 1
    492 
    493         rule cc
    494           command = gcc -c -o $out $in $cflags $extra_cflags
    495           pool = cc_pool
    496 
    497         build test.o : cc test.c
    498           extra_cflags = -Werror
    499 
    500         default test.o
    501 
    502     Example:
    503         >>> manifest = Parser().parse('build.ninja', 'utf-8')
    504         >>> print(manifest.builds)
    505 
    506     """
    507 
    508 
    509     def __init__(self, base_dir=None):
    510         if base_dir is None:
    511             self._base_dir = os.getcwd()
    512         else:
    513             self._base_dir = base_dir
    514 
    515         # File context
    516         self._context = []
    517         self._lexer = None
    518         self._env = None
    519 
    520         # Intermediate results
    521         self._builds = []
    522         self._rules = []
    523         self._pools = []
    524         self._defaults = []
    525 
    526         self._rules_dict = {}
    527 
    528 
    529     def _push_context(self, lexer, env):
    530         """Push a parsing file context.
    531 
    532         Args:
    533             lexer: Lexer for the associated file.
    534             env: Environment for global variable bindings.
    535         """
    536 
    537         self._context.append((self._lexer, self._env))
    538         self._lexer = lexer
    539         self._env = env
    540 
    541 
    542     def _pop_context(self):
    543         """Push a parsing file context."""
    544 
    545         current_context = (self._lexer, self._env)
    546         self._lexer, self._env = self._context.pop()
    547         return current_context
    548 
    549 
    550     def parse(self, path, encoding, depfile=None):
    551         """Parse a ninja-build manifest file.
    552 
    553         Args:
    554             path (str): Input file path to be parsed.
    555             encoding (str): Input file encoding.
    556 
    557         Returns:
    558             Manifest: Parsed manifest for the given ninja-build manifest file.
    559         """
    560 
    561         self._parse_internal(path, encoding, EvalEnv())
    562         if depfile:
    563             self.parse_dep_file(depfile, encoding)
    564         return Manifest(self._builds, self._rules, self._pools, self._defaults)
    565 
    566 
    567     def _parse_internal(self, path, encoding, env):
    568         path = os.path.join(self._base_dir, path)
    569         with open(path, 'r', encoding=encoding) as fp:
    570             self._push_context(Lexer(fp, path, encoding), env)
    571             try:
    572                 self._parse_all_top_level_stmts()
    573             finally:
    574                 self._pop_context()
    575 
    576 
    577     def _parse_all_top_level_stmts(self):
    578         """Parse all top-level statements in a file."""
    579         while self._parse_top_level_stmt():
    580             pass
    581 
    582 
    583     def _parse_top_level_stmt(self):
    584         """Parse a top level statement."""
    585 
    586         token = self._lexer.peek()
    587         if not token:
    588             # An unexpected non-trivial token occurs.  Raise an error.
    589             self._lexer.raise_error()
    590 
    591         if token.kind == TK.EOF:
    592             return False
    593         elif token.kind == TK.NEWLINE:
    594             self._lexer.lex()
    595         elif token.kind == TK.IDENT:
    596             ident = token.value
    597             if ident == 'rule':
    598                 self._parse_rule_stmt()
    599             elif ident == 'build':
    600                 self._parse_build_stmt()
    601             elif ident == 'default':
    602                 self._parse_default_stmt()
    603             elif ident == 'pool':
    604                 self._parse_pool_stmt()
    605             elif ident in {'subninja', 'include'}:
    606                 self._parse_include_stmt()
    607             else:
    608                 self._parse_global_binding_stmt()
    609         else:
    610             # An unexpected trivial token occurs.  Raise an error.
    611             self._lexer.raise_error()
    612         return True
    613 
    614 
    615     def _parse_path_list(self, end_set):
    616         """Parse a list of paths."""
    617 
    618         result = []
    619         while True:
    620             token = self._lexer.peek()
    621             if token:
    622                 if token.kind in end_set:
    623                     break
    624                 elif token.kind != TK.IDENT:
    625                     self._lexer.raise_error()
    626 
    627             token = self._lexer.lex_path()
    628             result.append(token.value)
    629         return result
    630 
    631 
    632     def _parse_binding_stmt(self):
    633         """Parse a variable binding statement.
    634 
    635         Example:
    636             IDENT = STRING
    637         """
    638         key = self._lexer.lex_match({TK.IDENT}).value
    639         self._lexer.lex_match({TK.ASSIGN})
    640         token = self._lexer.lex_string()
    641         value = token.value
    642         self._lexer.lex_match({TK.NEWLINE, TK.EOF})
    643         return (key, value)
    644 
    645 
    646     def _parse_global_binding_stmt(self):
    647         """Parse a global variable binding statement.
    648 
    649         Example:
    650             IDENT = STRING
    651         """
    652 
    653         key, value = self._parse_binding_stmt()
    654         value = eval_string(value, self._env)
    655         self._env[key] = value
    656 
    657 
    658     def _parse_local_binding_block(self):
    659         """Parse several local variable bindings.
    660 
    661         Example:
    662             SPACE IDENT1 = STRING1
    663             SPACE IDENT2 = STRING2
    664         """
    665         result = EvalEnv()
    666         while True:
    667             token = self._lexer.peek()
    668             if not token or token.kind != TK.SPACE:
    669                 break
    670             self._lexer.lex()
    671             key, value = self._parse_binding_stmt()
    672             result[key] = value
    673         return result
    674 
    675 
    676     def _parse_build_stmt(self):
    677         """Parse `build` statement.
    678 
    679         Example:
    680             build PATH1 PATH2 | PATH3 PATH4 : IDENT PATH5 PATH6 | $
    681                   PATH7 PATH8 || PATH9 PATH10
    682             SPACE IDENT1 = STRING1
    683             SPACE IDENT2 = STRING2
    684         """
    685 
    686         token = self._lexer.lex_match({TK.IDENT})
    687         assert token.value == 'build'
    688 
    689         build = Build()
    690 
    691         # Parse explicit outs
    692         explicit_outs = self._parse_path_list({TK.PIPE, TK.COLON})
    693 
    694         # Parse implicit outs
    695         token = self._lexer.peek()
    696         if token.kind == TK.PIPE:
    697             self._lexer.lex()
    698             implicit_outs = self._parse_path_list({TK.COLON})
    699         else:
    700             implicit_outs = tuple()
    701 
    702         self._lexer.lex_match({TK.COLON})
    703 
    704         # Parse rule name for this build statement
    705         build.rule = self._lexer.lex_match({TK.IDENT}).value
    706         try:
    707             rule_env = self._rules_dict[build.rule].bindings
    708         except KeyError:
    709             if build.rule != 'phony':
    710                 self._lexer.raise_error('undeclared rule name')
    711             rule_env = self._env
    712 
    713         # Parse explicit ins
    714         explicit_ins = self._parse_path_list(
    715                 {TK.PIPE, TK.PIPE2, TK.NEWLINE, TK.EOF})
    716 
    717         # Parse implicit ins
    718         token = self._lexer.peek()
    719         if token.kind == TK.PIPE:
    720             self._lexer.lex()
    721             implicit_ins = self._parse_path_list({TK.PIPE2, TK.NEWLINE, TK.EOF})
    722         else:
    723             implicit_ins = tuple()
    724 
    725         # Parse order-only prerequisites
    726         token = self._lexer.peek()
    727         if token.kind == TK.PIPE2:
    728             self._lexer.lex()
    729             prerequisites = self._parse_path_list({TK.NEWLINE, TK.EOF})
    730         else:
    731             prerequisites = tuple()
    732 
    733         self._lexer.lex_match({TK.NEWLINE, TK.EOF})
    734 
    735         # Parse local bindings
    736         bindings = self._parse_local_binding_block()
    737         bindings.parent = self._env
    738         if bindings:
    739             build.bindings = bindings
    740         else:
    741             # Don't keep the empty ``dict`` object if there are no bindings
    742             build.bindings = None
    743 
    744         # Evaluate all paths
    745         env = BuildEvalEnv(bindings, rule_env)
    746 
    747         build.explicit_outs = eval_path_strings(explicit_outs, env)
    748         build.implicit_outs = eval_path_strings(implicit_outs, env)
    749         build.explicit_ins = eval_path_strings(explicit_ins, env)
    750         build.implicit_ins = eval_path_strings(implicit_ins, env)
    751         build.prerequisites = eval_path_strings(prerequisites, env)
    752         build.depfile_implicit_ins = tuple()
    753 
    754         self._builds.append(build)
    755 
    756 
    757     def _parse_rule_stmt(self):
    758         """Parse a `rule` statement.
    759 
    760         Example:
    761             rule IDENT
    762             SPACE IDENT1 = STRING1
    763             SPACE IDENT2 = STRING2
    764         """
    765 
    766         token = self._lexer.lex_match({TK.IDENT})
    767         assert token.value == 'rule'
    768 
    769         rule = Rule()
    770         rule.name = self._lexer.lex_match({TK.IDENT}).value
    771         self._lexer.lex_match({TK.NEWLINE, TK.EOF})
    772         rule.bindings = self._parse_local_binding_block()
    773 
    774         self._rules.append(rule)
    775         self._rules_dict[rule.name] = rule
    776 
    777 
    778     def _parse_default_stmt(self):
    779         """Parse a `default` statement.
    780 
    781         Example:
    782             default PATH1 PATH2 PATH3
    783         """
    784 
    785         token = self._lexer.lex_match({TK.IDENT})
    786         assert token.value == 'default'
    787 
    788         default = Default()
    789         outs = self._parse_path_list({TK.NEWLINE, TK.EOF})
    790         default.outs = eval_path_strings(outs, self._env)
    791 
    792         self._lexer.lex_match({TK.NEWLINE, TK.EOF})
    793 
    794         self._defaults.append(default)
    795 
    796 
    797     def _parse_pool_stmt(self):
    798         """Parse a `pool` statement.
    799 
    800         Example:
    801             pool IDENT
    802             SPACE IDENT1 = STRING1
    803             SPACE IDENT2 = STRING2
    804         """
    805         token = self._lexer.lex_match({TK.IDENT})
    806         assert token.value == 'pool'
    807 
    808         pool = Pool()
    809 
    810         token = self._lexer.lex()
    811         assert token.kind == TK.IDENT
    812         pool.name = token.value
    813 
    814         self._lexer.lex_match({TK.NEWLINE, TK.EOF})
    815 
    816         pool.bindings = self._parse_local_binding_block()
    817 
    818         self._pools.append(pool)
    819 
    820 
    821     def _parse_include_stmt(self):
    822         """Parse an `include` or `subninja` statement.
    823 
    824         Example:
    825             include PATH
    826             subninja PATH
    827         """
    828 
    829         token = self._lexer.lex_match({TK.IDENT})
    830         assert token.value in {'include', 'subninja'}
    831         wrap_env = token.value == 'subninja'
    832 
    833         token = self._lexer.lex_path()
    834         path = eval_string(token.value, self._env)  # XXX: Check lookup order
    835         self._lexer.lex_match({TK.NEWLINE, TK.EOF})
    836 
    837         if wrap_env:
    838             env = EvalEnv()
    839             env.parent = self._env
    840         else:
    841             env = self._env
    842         self._parse_internal(path, self._lexer.encoding, env)
    843 
    844 
    845     def parse_dep_file(self, path, encoding):
    846         depfile = DepFileParser().parse(path, encoding)
    847         for build in self._builds:
    848             depfile_implicit_ins = set()
    849             for explicit_out in build.explicit_outs:
    850                 deps = depfile.get(explicit_out)
    851                 if deps:
    852                     depfile_implicit_ins.update(deps.implicit_ins)
    853             build.depfile_implicit_ins = tuple(sorted(depfile_implicit_ins))
    854 
    855 
    856 class DepFileError(ValueError):
    857     pass
    858 
    859 
    860 class DepFileRecord(object):
    861     __slots__ = ('id', 'explicit_out', 'mtime', 'implicit_ins')
    862 
    863 
    864     def __init__(self, id, explicit_out, mtime, implicit_ins):
    865         self.id = id
    866         self.explicit_out = explicit_out
    867         self.mtime = mtime
    868         self.implicit_ins = implicit_ins
    869 
    870 
    871 class DepFileParser(object):
    872     """Ninja deps log parser which parses ``.ninja_deps`` file.
    873     """
    874 
    875 
    876     def __init__(self):
    877         self._deps = []
    878         self._paths = []
    879         self._path_deps = {}
    880 
    881 
    882     def parse(self, path, encoding):
    883         with open(path, 'rb') as fp:
    884             return self._parse(fp, encoding)
    885 
    886 
    887     @staticmethod
    888     def _unpack_uint32(buf):
    889         return struct.unpack('<I', buf)[0]
    890 
    891 
    892     @staticmethod
    893     def _unpack_uint32_iter(buf):
    894         for p in struct.iter_unpack('<I', buf):
    895             yield p[0]
    896 
    897 
    898     if sys.version_info < (3,):
    899         @staticmethod
    900         def _extract_path(s, encoding):
    901             pos = len(s)
    902             count = 3
    903             while count > 0 and pos > 0 and s[pos - 1] == b'\0':
    904                 pos -= 1
    905                 count -= 1
    906             return intern(s[0:pos])
    907     else:
    908         @staticmethod
    909         def _extract_path(s, encoding):
    910             pos = len(s)
    911             count = 3
    912             while count > 0 and pos > 0 and s[pos - 1] == 0:
    913                 pos -= 1
    914                 count -= 1
    915             return intern(s[0:pos].decode(encoding))
    916 
    917 
    918     def _get_path(self, index):
    919         try:
    920             return self._paths[index]
    921         except IndexError:
    922             raise DepFileError('path index overflow')
    923 
    924 
    925     def _parse(self, fp, encoding):
    926         # Check the magic word
    927         if fp.readline() != b'# ninjadeps\n':
    928             raise DepFileError('bad magic word')
    929 
    930         # Check the file format version
    931         version = self._unpack_uint32(fp.read(4))
    932         if version != 3:
    933             raise DepFileError('unsupported deps log version: ' + str(version))
    934 
    935         # Read the records
    936         MAX_RECORD_SIZE = (1 << 19) - 1
    937         while True:
    938             buf = fp.read(4)
    939             if not buf:
    940                 break
    941 
    942             record_size = self._unpack_uint32(buf)
    943             is_dep = bool(record_size >> 31)
    944             record_size &= (1 << 31) - 1
    945 
    946             if record_size > MAX_RECORD_SIZE:
    947                 raise DepFileError('record size overflow')
    948 
    949             if is_dep:
    950                 if record_size % 4 != 0 or record_size < 8:
    951                     raise DepFileError('corrupted deps record')
    952 
    953                 buf = fp.read(record_size)
    954 
    955                 dep_iter = self._unpack_uint32_iter(buf)
    956 
    957                 idx = len(self._deps)
    958                 explicit_out = self._get_path(next(dep_iter))
    959                 mtime = next(dep_iter)
    960                 implicit_ins = [self._get_path(p) for p in dep_iter]
    961 
    962                 deps = DepFileRecord(idx, explicit_out, mtime, implicit_ins)
    963 
    964                 old_deps = self._path_deps.get(explicit_out)
    965                 if not old_deps:
    966                     self._deps.append(deps)
    967                     self._path_deps[explicit_out] = deps
    968                 elif old_deps.mtime > deps.mtime:
    969                     self._deps.append(None)
    970                 else:
    971                     self._deps[old_deps.id] = None
    972                     self._deps.append(deps)
    973                     self._path_deps[explicit_out] = deps
    974             else:
    975                 if record_size < 4:
    976                     raise DepFileError('corrupted path record')
    977                 buf = fp.read(record_size - 4)
    978                 path = self._extract_path(buf, encoding)
    979                 buf = fp.read(4)
    980                 checksum = 0xffffffff ^ self._unpack_uint32(buf)
    981                 if len(self._paths) != checksum:
    982                     raise DepFileError('bad path record checksum')
    983                 self._paths.append(path)
    984 
    985         return self._path_deps
    986 
    987 
    988 def main():
    989     # Parse command line options
    990     parser = argparse.ArgumentParser()
    991     parser.add_argument('input_file', help='input ninja file')
    992     parser.add_argument('--encoding', default='utf-8',
    993                         help='ninja file encoding')
    994     parser.add_argument('--ninja-deps', help='.ninja_deps file')
    995     args = parser.parse_args()
    996 
    997     if DEBUG_ALLOC:
    998         tracemalloc.start(25)
    999         tc_start = tracemalloc.take_snapshot()
   1000 
   1001     # Parse ninja file
   1002     manifest = Parser().parse(args.input_file, args.encoding, args.ninja_deps)
   1003 
   1004     if DEBUG_ALLOC:
   1005         tc_end = tracemalloc.take_snapshot()
   1006 
   1007     for rule in manifest.rules:
   1008         print('rule', rule.name)
   1009 
   1010     for build in manifest.builds:
   1011         print('build')
   1012         for path in build.explicit_outs:
   1013             print('  explicit_out:', path)
   1014         for path in build.implicit_outs:
   1015             print('  implicit_out:', path)
   1016         for path in build.explicit_ins:
   1017             print('  explicit_in:', path)
   1018         for path in build.implicit_ins:
   1019             print('  implicit_in:', path)
   1020         for path in build.prerequisites:
   1021             print('  prerequisites:', path)
   1022         for path in build.depfile_implicit_ins:
   1023             print('  depfile_implicit_in:', path)
   1024 
   1025     for pool in manifest.pools:
   1026         print('pool', pool.name)
   1027 
   1028     for default in manifest.defaults:
   1029         print('default')
   1030         for path in default.outs:
   1031             print('  out:', path)
   1032 
   1033     if DEBUG_ALLOC:
   1034         top_stats = tc_end.compare_to(tc_start, 'traceback')
   1035         with open('tracemalloc.log', 'w') as fp:
   1036             for s in top_stats:
   1037                 print('', file=fp)
   1038                 print('========================================', file=fp)
   1039                 print(s, file=fp)
   1040                 for line in s.traceback.format():
   1041                     print(line, file=fp)
   1042 
   1043 if __name__ == '__main__':
   1044     main()
   1045