Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python2
      2 #
      3 # Copyright (C) 2014 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #   http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 
     18 # Checker is a testing tool which compiles a given test file and compares the
     19 # state of the control-flow graph before and after each optimization pass
     20 # against a set of assertions specified alongside the tests.
     21 #
     22 # Tests are written in Java, turned into DEX and compiled with the Optimizing
     23 # compiler. "Check lines" are assertions formatted as comments of the Java file.
     24 # They begin with prefix 'CHECK' followed by a pattern that the engine attempts
     25 # to match in the compiler-generated output.
     26 #
     27 # Assertions are tested in groups which correspond to the individual compiler
     28 # passes. Each group of check lines therefore must start with a 'CHECK-START'
     29 # header which specifies the output group it should be tested against. The group
     30 # name must exactly match one of the groups recognized in the output (they can
     31 # be listed with the '--list-groups' command-line flag).
     32 #
     33 # Matching of check lines is carried out in the order of appearance in the
     34 # source file. There are three types of check lines:
     35 #  - CHECK:     Must match an output line which appears in the output group
     36 #               later than lines matched against any preceeding checks. Output
     37 #               lines must therefore match the check lines in the same order.
     38 #               These are referred to as "in-order" checks in the code.
     39 #  - CHECK-DAG: Must match an output line which appears in the output group
     40 #               later than lines matched against any preceeding in-order checks.
     41 #               In other words, the order of output lines does not matter
     42 #               between consecutive DAG checks.
     43 #  - CHECK-NOT: Must not match any output line which appears in the output group
     44 #               later than lines matched against any preceeding checks and
     45 #               earlier than lines matched against any subsequent checks.
     46 #               Surrounding non-negative checks (or boundaries of the group)
     47 #               therefore create a scope within which the assertion is verified.
     48 #
     49 # Check-line patterns are treated as plain text rather than regular expressions
     50 # but are whitespace agnostic.
     51 #
     52 # Actual regex patterns can be inserted enclosed in '{{' and '}}' brackets. If
     53 # curly brackets need to be used inside the body of the regex, they need to be
     54 # enclosed in round brackets. For example, the pattern '{{foo{2}}}' will parse
     55 # the invalid regex 'foo{2', but '{{(fo{2})}}' will match 'foo'.
     56 #
     57 # Regex patterns can be named and referenced later. A new variable is defined
     58 # with '[[name:regex]]' and can be referenced with '[[name]]'. Variables are
     59 # only valid within the scope of the defining group. Within a group they cannot
     60 # be redefined or used undefined.
     61 #
     62 # Example:
     63 #   The following assertions can be placed in a Java source file:
     64 #
     65 #   // CHECK-START: int MyClass.MyMethod() constant_folding (after)
     66 #   // CHECK:         [[ID:i[0-9]+]] IntConstant {{11|22}}
     67 #   // CHECK:                        Return [ [[ID]] ]
     68 #
     69 #   The engine will attempt to match the check lines against the output of the
     70 #   group named on the first line. Together they verify that the CFG after
     71 #   constant folding returns an integer constant with value either 11 or 22.
     72 #
     73 
     74 from __future__ import print_function
     75 import argparse
     76 import os
     77 import re
     78 import shutil
     79 import sys
     80 import tempfile
     81 
     82 class Logger(object):
     83 
     84   class Level(object):
     85     NoOutput, Error, Info = range(3)
     86 
     87   class Color(object):
     88     Default, Blue, Gray, Purple, Red = range(5)
     89 
     90     @staticmethod
     91     def terminalCode(color, out=sys.stdout):
     92       if not out.isatty():
     93         return ''
     94       elif color == Logger.Color.Blue:
     95         return '\033[94m'
     96       elif color == Logger.Color.Gray:
     97         return '\033[37m'
     98       elif color == Logger.Color.Purple:
     99         return '\033[95m'
    100       elif color == Logger.Color.Red:
    101         return '\033[91m'
    102       else:
    103         return '\033[0m'
    104 
    105   Verbosity = Level.Info
    106 
    107   @staticmethod
    108   def log(text, level=Level.Info, color=Color.Default, newLine=True, out=sys.stdout):
    109     if level <= Logger.Verbosity:
    110       text = Logger.Color.terminalCode(color, out) + text + \
    111              Logger.Color.terminalCode(Logger.Color.Default, out)
    112       if newLine:
    113         print(text, file=out)
    114       else:
    115         print(text, end="", file=out)
    116       out.flush()
    117 
    118   @staticmethod
    119   def fail(msg, file=None, line=-1):
    120     location = ""
    121     if file:
    122       location += file + ":"
    123     if line > 0:
    124       location += str(line) + ":"
    125     if location:
    126       location += " "
    127 
    128     Logger.log(location, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
    129     Logger.log("error: ", Logger.Level.Error, color=Logger.Color.Red, newLine=False, out=sys.stderr)
    130     Logger.log(msg, Logger.Level.Error, out=sys.stderr)
    131     sys.exit(msg)
    132 
    133   @staticmethod
    134   def startTest(name):
    135     Logger.log("TEST ", color=Logger.Color.Purple, newLine=False)
    136     Logger.log(name + "... ", newLine=False)
    137 
    138   @staticmethod
    139   def testPassed():
    140     Logger.log("PASS", color=Logger.Color.Blue)
    141 
    142   @staticmethod
    143   def testFailed(msg, file=None, line=-1):
    144     Logger.log("FAIL", color=Logger.Color.Red)
    145     Logger.fail(msg, file, line)
    146 
    147 class CommonEqualityMixin:
    148   """Mixin for class equality as equality of the fields."""
    149   def __eq__(self, other):
    150     return (isinstance(other, self.__class__)
    151            and self.__dict__ == other.__dict__)
    152 
    153   def __ne__(self, other):
    154     return not self.__eq__(other)
    155 
    156   def __repr__(self):
    157     return "<%s: %s>" % (type(self).__name__, str(self.__dict__))
    158 
    159 
    160 class CheckElement(CommonEqualityMixin):
    161   """Single element of the check line."""
    162 
    163   class Variant(object):
    164     """Supported language constructs."""
    165     Text, Pattern, VarRef, VarDef, Separator = range(5)
    166 
    167   rStartOptional = r"("
    168   rEndOptional = r")?"
    169 
    170   rName = r"([a-zA-Z][a-zA-Z0-9]*)"
    171   rRegex = r"(.+?)"
    172   rPatternStartSym = r"(\{\{)"
    173   rPatternEndSym = r"(\}\})"
    174   rVariableStartSym = r"(\[\[)"
    175   rVariableEndSym = r"(\]\])"
    176   rVariableSeparator = r"(:)"
    177 
    178   regexPattern = rPatternStartSym + rRegex + rPatternEndSym
    179   regexVariable = rVariableStartSym + \
    180                     rName + \
    181                     (rStartOptional + rVariableSeparator + rRegex + rEndOptional) + \
    182                   rVariableEndSym
    183 
    184   def __init__(self, variant, name, pattern):
    185     self.variant = variant
    186     self.name = name
    187     self.pattern = pattern
    188 
    189   @staticmethod
    190   def newSeparator():
    191     return CheckElement(CheckElement.Variant.Separator, None, None)
    192 
    193   @staticmethod
    194   def parseText(text):
    195     return CheckElement(CheckElement.Variant.Text, None, re.escape(text))
    196 
    197   @staticmethod
    198   def parsePattern(patternElem):
    199     return CheckElement(CheckElement.Variant.Pattern, None, patternElem[2:-2])
    200 
    201   @staticmethod
    202   def parseVariable(varElem):
    203     colonPos = varElem.find(":")
    204     if colonPos == -1:
    205       # Variable reference
    206       name = varElem[2:-2]
    207       return CheckElement(CheckElement.Variant.VarRef, name, None)
    208     else:
    209       # Variable definition
    210       name = varElem[2:colonPos]
    211       body = varElem[colonPos+1:-2]
    212       return CheckElement(CheckElement.Variant.VarDef, name, body)
    213 
    214 class CheckLine(CommonEqualityMixin):
    215   """Representation of a single assertion in the check file formed of one or
    216      more regex elements. Matching against an output line is successful only
    217      if all regex elements can be matched in the given order."""
    218 
    219   class Variant(object):
    220     """Supported types of assertions."""
    221     InOrder, DAG, Not = range(3)
    222 
    223   def __init__(self, content, variant=Variant.InOrder, fileName=None, lineNo=-1):
    224     self.fileName = fileName
    225     self.lineNo = lineNo
    226     self.content = content.strip()
    227 
    228     self.variant = variant
    229     self.lineParts = self.__parse(self.content)
    230     if not self.lineParts:
    231       Logger.fail("Empty check line", self.fileName, self.lineNo)
    232 
    233     if self.variant == CheckLine.Variant.Not:
    234       for elem in self.lineParts:
    235         if elem.variant == CheckElement.Variant.VarDef:
    236           Logger.fail("CHECK-NOT lines cannot define variables", self.fileName, self.lineNo)
    237 
    238   def __eq__(self, other):
    239     return (isinstance(other, self.__class__) and
    240             self.variant == other.variant and
    241             self.lineParts == other.lineParts)
    242 
    243   # Returns True if the given Match object was at the beginning of the line.
    244   def __isMatchAtStart(self, match):
    245     return (match is not None) and (match.start() == 0)
    246 
    247   # Takes in a list of Match objects and returns the minimal start point among
    248   # them. If there aren't any successful matches it returns the length of
    249   # the searched string.
    250   def __firstMatch(self, matches, string):
    251     starts = map(lambda m: len(string) if m is None else m.start(), matches)
    252     return min(starts)
    253 
    254   # This method parses the content of a check line stripped of the initial
    255   # comment symbol and the CHECK keyword.
    256   def __parse(self, line):
    257     lineParts = []
    258     # Loop as long as there is something to parse.
    259     while line:
    260       # Search for the nearest occurrence of the special markers.
    261       matchWhitespace = re.search(r"\s+", line)
    262       matchPattern = re.search(CheckElement.regexPattern, line)
    263       matchVariable = re.search(CheckElement.regexVariable, line)
    264 
    265       # If one of the above was identified at the current position, extract them
    266       # from the line, parse them and add to the list of line parts.
    267       if self.__isMatchAtStart(matchWhitespace):
    268         # A whitespace in the check line creates a new separator of line parts.
    269         # This allows for ignored output between the previous and next parts.
    270         line = line[matchWhitespace.end():]
    271         lineParts.append(CheckElement.newSeparator())
    272       elif self.__isMatchAtStart(matchPattern):
    273         pattern = line[0:matchPattern.end()]
    274         line = line[matchPattern.end():]
    275         lineParts.append(CheckElement.parsePattern(pattern))
    276       elif self.__isMatchAtStart(matchVariable):
    277         var = line[0:matchVariable.end()]
    278         line = line[matchVariable.end():]
    279         lineParts.append(CheckElement.parseVariable(var))
    280       else:
    281         # If we're not currently looking at a special marker, this is a plain
    282         # text match all the way until the first special marker (or the end
    283         # of the line).
    284         firstMatch = self.__firstMatch([ matchWhitespace, matchPattern, matchVariable ], line)
    285         text = line[0:firstMatch]
    286         line = line[firstMatch:]
    287         lineParts.append(CheckElement.parseText(text))
    288     return lineParts
    289 
    290   # Returns the regex pattern to be matched in the output line. Variable
    291   # references are substituted with their current values provided in the
    292   # 'varState' argument.
    293   # An exception is raised if a referenced variable is undefined.
    294   def __generatePattern(self, linePart, varState):
    295     if linePart.variant == CheckElement.Variant.VarRef:
    296       try:
    297         return re.escape(varState[linePart.name])
    298       except KeyError:
    299         Logger.testFailed("Use of undefined variable \"" + linePart.name + "\"",
    300                           self.fileName, self.lineNo)
    301     else:
    302       return linePart.pattern
    303 
    304   def __isSeparated(self, outputLine, matchStart):
    305     return (matchStart == 0) or (outputLine[matchStart - 1:matchStart].isspace())
    306 
    307   # Attempts to match the check line against a line from the output file with
    308   # the given initial variable values. It returns the new variable state if
    309   # successful and None otherwise.
    310   def match(self, outputLine, initialVarState):
    311     # Do the full matching on a shadow copy of the variable state. If the
    312     # matching fails half-way, we will not need to revert the state.
    313     varState = dict(initialVarState)
    314 
    315     matchStart = 0
    316     isAfterSeparator = True
    317 
    318     # Now try to parse all of the parts of the check line in the right order.
    319     # Variable values are updated on-the-fly, meaning that a variable can
    320     # be referenced immediately after its definition.
    321     for part in self.lineParts:
    322       if part.variant == CheckElement.Variant.Separator:
    323         isAfterSeparator = True
    324         continue
    325 
    326       # Find the earliest match for this line part.
    327       pattern = self.__generatePattern(part, varState)
    328       while True:
    329         match = re.search(pattern, outputLine[matchStart:])
    330         if (match is None) or (not isAfterSeparator and not self.__isMatchAtStart(match)):
    331           return None
    332         matchEnd = matchStart + match.end()
    333         matchStart += match.start()
    334 
    335         # Check if this is a valid match if we expect a whitespace separator
    336         # before the matched text. Otherwise loop and look for another match.
    337         if not isAfterSeparator or self.__isSeparated(outputLine, matchStart):
    338           break
    339         else:
    340           matchStart += 1
    341 
    342       if part.variant == CheckElement.Variant.VarDef:
    343         if part.name in varState:
    344           Logger.testFailed("Multiple definitions of variable \"" + part.name + "\"",
    345                             self.fileName, self.lineNo)
    346         varState[part.name] = outputLine[matchStart:matchEnd]
    347 
    348       matchStart = matchEnd
    349       isAfterSeparator = False
    350 
    351     # All parts were successfully matched. Return the new variable state.
    352     return varState
    353 
    354 
    355 class CheckGroup(CommonEqualityMixin):
    356   """Represents a named collection of check lines which are to be matched
    357      against an output group of the same name."""
    358 
    359   def __init__(self, name, lines, fileName=None, lineNo=-1):
    360     self.fileName = fileName
    361     self.lineNo = lineNo
    362 
    363     if not name:
    364       Logger.fail("Check group does not have a name", self.fileName, self.lineNo)
    365     if not lines:
    366       Logger.fail("Check group does not have a body", self.fileName, self.lineNo)
    367 
    368     self.name = name
    369     self.lines = lines
    370 
    371   def __eq__(self, other):
    372     return (isinstance(other, self.__class__) and
    373             self.name == other.name and
    374             self.lines == other.lines)
    375 
    376   def __headAndTail(self, list):
    377     return list[0], list[1:]
    378 
    379   # Splits a list of check lines at index 'i' such that lines[i] is the first
    380   # element whose variant is not equal to the given parameter.
    381   def __splitByVariant(self, lines, variant):
    382     i = 0
    383     while i < len(lines) and lines[i].variant == variant:
    384       i += 1
    385     return lines[:i], lines[i:]
    386 
    387   # Extracts the first sequence of check lines which are independent of each
    388   # other's match location, i.e. either consecutive DAG lines or a single
    389   # InOrder line. Any Not lines preceeding this sequence are also extracted.
    390   def __nextIndependentChecks(self, checkLines):
    391     notChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.Not)
    392     if not checkLines:
    393       return notChecks, [], []
    394 
    395     head, tail = self.__headAndTail(checkLines)
    396     if head.variant == CheckLine.Variant.InOrder:
    397       return notChecks, [head], tail
    398     else:
    399       assert head.variant == CheckLine.Variant.DAG
    400       independentChecks, checkLines = self.__splitByVariant(checkLines, CheckLine.Variant.DAG)
    401       return notChecks, independentChecks, checkLines
    402 
    403   # If successful, returns the line number of the first output line matching the
    404   # check line and the updated variable state. Otherwise returns -1 and None,
    405   # respectively. The 'lineFilter' parameter can be used to supply a list of
    406   # line numbers (counting from 1) which should be skipped.
    407   def __findFirstMatch(self, checkLine, outputLines, startLineNo, lineFilter, varState):
    408     matchLineNo = startLineNo
    409     for outputLine in outputLines:
    410       if matchLineNo not in lineFilter:
    411         newVarState = checkLine.match(outputLine, varState)
    412         if newVarState is not None:
    413           return matchLineNo, newVarState
    414       matchLineNo += 1
    415     return -1, None
    416 
    417   # Matches the given positive check lines against the output in order of
    418   # appearance. Variable state is propagated but the scope of the search remains
    419   # the same for all checks. Each output line can only be matched once.
    420   # If all check lines are matched, the resulting variable state is returned
    421   # together with the remaining output. The function also returns output lines
    422   # which appear before either of the matched lines so they can be tested
    423   # against Not checks.
    424   def __matchIndependentChecks(self, checkLines, outputLines, startLineNo, varState):
    425     # If no checks are provided, skip over the entire output.
    426     if not checkLines:
    427       return outputLines, [], startLineNo + len(outputLines), varState
    428 
    429     # Keep track of which lines have been matched.
    430     matchedLines = []
    431 
    432     # Find first unused output line which matches each check line.
    433     for checkLine in checkLines:
    434       matchLineNo, varState = \
    435         self.__findFirstMatch(checkLine, outputLines, startLineNo, matchedLines, varState)
    436       if varState is None:
    437         Logger.testFailed("Could not match check line \"" + checkLine.content + "\" " +
    438                           "starting from output line " + str(startLineNo),
    439                           self.fileName, checkLine.lineNo)
    440       matchedLines.append(matchLineNo)
    441 
    442     # Return new variable state and the output lines which lie outside the
    443     # match locations of this independent group.
    444     minMatchLineNo = min(matchedLines)
    445     maxMatchLineNo = max(matchedLines)
    446     preceedingLines = outputLines[:minMatchLineNo - startLineNo]
    447     remainingLines = outputLines[maxMatchLineNo - startLineNo + 1:]
    448     return preceedingLines, remainingLines, maxMatchLineNo + 1, varState
    449 
    450   # Makes sure that the given check lines do not match any of the given output
    451   # lines. Variable state does not change.
    452   def __matchNotLines(self, checkLines, outputLines, startLineNo, varState):
    453     for checkLine in checkLines:
    454       assert checkLine.variant == CheckLine.Variant.Not
    455       matchLineNo, matchVarState = \
    456         self.__findFirstMatch(checkLine, outputLines, startLineNo, [], varState)
    457       if matchVarState is not None:
    458         Logger.testFailed("CHECK-NOT line \"" + checkLine.content + "\" matches output line " + \
    459                           str(matchLineNo), self.fileName, checkLine.lineNo)
    460 
    461   # Matches the check lines in this group against an output group. It is
    462   # responsible for running the checks in the right order and scope, and
    463   # for propagating the variable state between the check lines.
    464   def match(self, outputGroup):
    465     varState = {}
    466     checkLines = self.lines
    467     outputLines = outputGroup.body
    468     startLineNo = outputGroup.lineNo
    469 
    470     while checkLines:
    471       # Extract the next sequence of location-independent checks to be matched.
    472       notChecks, independentChecks, checkLines = self.__nextIndependentChecks(checkLines)
    473 
    474       # Match the independent checks.
    475       notOutput, outputLines, newStartLineNo, newVarState = \
    476         self.__matchIndependentChecks(independentChecks, outputLines, startLineNo, varState)
    477 
    478       # Run the Not checks against the output lines which lie between the last
    479       # two independent groups or the bounds of the output.
    480       self.__matchNotLines(notChecks, notOutput, startLineNo, varState)
    481 
    482       # Update variable state.
    483       startLineNo = newStartLineNo
    484       varState = newVarState
    485 
    486 class OutputGroup(CommonEqualityMixin):
    487   """Represents a named part of the test output against which a check group of
    488      the same name is to be matched."""
    489 
    490   def __init__(self, name, body, fileName=None, lineNo=-1):
    491     if not name:
    492       Logger.fail("Output group does not have a name", fileName, lineNo)
    493     if not body:
    494       Logger.fail("Output group does not have a body", fileName, lineNo)
    495 
    496     self.name = name
    497     self.body = body
    498     self.lineNo = lineNo
    499 
    500   def __eq__(self, other):
    501     return (isinstance(other, self.__class__) and
    502             self.name == other.name and
    503             self.body == other.body)
    504 
    505 
    506 class FileSplitMixin(object):
    507   """Mixin for representing text files which need to be split into smaller
    508      chunks before being parsed."""
    509 
    510   def _parseStream(self, stream):
    511     lineNo = 0
    512     allGroups = []
    513     currentGroup = None
    514 
    515     for line in stream:
    516       lineNo += 1
    517       line = line.strip()
    518       if not line:
    519         continue
    520 
    521       # Let the child class process the line and return information about it.
    522       # The _processLine method can modify the content of the line (or delete it
    523       # entirely) and specify whether it starts a new group.
    524       processedLine, newGroupName = self._processLine(line, lineNo)
    525       if newGroupName is not None:
    526         currentGroup = (newGroupName, [], lineNo)
    527         allGroups.append(currentGroup)
    528       if processedLine is not None:
    529         if currentGroup is not None:
    530           currentGroup[1].append(processedLine)
    531         else:
    532           self._exceptionLineOutsideGroup(line, lineNo)
    533 
    534     # Finally, take the generated line groups and let the child class process
    535     # each one before storing the final outcome.
    536     return list(map(lambda group: self._processGroup(group[0], group[1], group[2]), allGroups))
    537 
    538 
    539 class CheckFile(FileSplitMixin):
    540   """Collection of check groups extracted from the input test file."""
    541 
    542   def __init__(self, prefix, checkStream, fileName=None):
    543     self.fileName = fileName
    544     self.prefix = prefix
    545     self.groups = self._parseStream(checkStream)
    546 
    547   # Attempts to parse a check line. The regex searches for a comment symbol
    548   # followed by the CHECK keyword, given attribute and a colon at the very
    549   # beginning of the line. Whitespaces are ignored.
    550   def _extractLine(self, prefix, line):
    551     rIgnoreWhitespace = r"\s*"
    552     rCommentSymbols = [r"//", r"#"]
    553     regexPrefix = rIgnoreWhitespace + \
    554                   r"(" + r"|".join(rCommentSymbols) + r")" + \
    555                   rIgnoreWhitespace + \
    556                   prefix + r":"
    557 
    558     # The 'match' function succeeds only if the pattern is matched at the
    559     # beginning of the line.
    560     match = re.match(regexPrefix, line)
    561     if match is not None:
    562       return line[match.end():].strip()
    563     else:
    564       return None
    565 
    566   # This function is invoked on each line of the check file and returns a pair
    567   # which instructs the parser how the line should be handled. If the line is to
    568   # be included in the current check group, it is returned in the first value.
    569   # If the line starts a new check group, the name of the group is returned in
    570   # the second value.
    571   def _processLine(self, line, lineNo):
    572     # Lines beginning with 'CHECK-START' start a new check group.
    573     startLine = self._extractLine(self.prefix + "-START", line)
    574     if startLine is not None:
    575       return None, startLine
    576 
    577     # Lines starting only with 'CHECK' are matched in order.
    578     plainLine = self._extractLine(self.prefix, line)
    579     if plainLine is not None:
    580       return (plainLine, CheckLine.Variant.InOrder, lineNo), None
    581 
    582     # 'CHECK-DAG' lines are no-order assertions.
    583     dagLine = self._extractLine(self.prefix + "-DAG", line)
    584     if dagLine is not None:
    585       return (dagLine, CheckLine.Variant.DAG, lineNo), None
    586 
    587     # 'CHECK-NOT' lines are no-order negative assertions.
    588     notLine = self._extractLine(self.prefix + "-NOT", line)
    589     if notLine is not None:
    590       return (notLine, CheckLine.Variant.Not, lineNo), None
    591 
    592     # Other lines are ignored.
    593     return None, None
    594 
    595   def _exceptionLineOutsideGroup(self, line, lineNo):
    596     Logger.fail("Check line not inside a group", self.fileName, lineNo)
    597 
    598   # Constructs a check group from the parser-collected check lines.
    599   def _processGroup(self, name, lines, lineNo):
    600     checkLines = list(map(lambda line: CheckLine(line[0], line[1], self.fileName, line[2]), lines))
    601     return CheckGroup(name, checkLines, self.fileName, lineNo)
    602 
    603   def match(self, outputFile):
    604     for checkGroup in self.groups:
    605       # TODO: Currently does not handle multiple occurrences of the same group
    606       # name, e.g. when a pass is run multiple times. It will always try to
    607       # match a check group against the first output group of the same name.
    608       outputGroup = outputFile.findGroup(checkGroup.name)
    609       if outputGroup is None:
    610         Logger.fail("Group \"" + checkGroup.name + "\" not found in the output",
    611                     self.fileName, checkGroup.lineNo)
    612       Logger.startTest(checkGroup.name)
    613       checkGroup.match(outputGroup)
    614       Logger.testPassed()
    615 
    616 
    617 class OutputFile(FileSplitMixin):
    618   """Representation of the output generated by the test and split into groups
    619      within which the checks are performed.
    620 
    621      C1visualizer format is parsed with a state machine which differentiates
    622      between the 'compilation' and 'cfg' blocks. The former marks the beginning
    623      of a method. It is parsed for the method's name but otherwise ignored. Each
    624      subsequent CFG block represents one stage of the compilation pipeline and
    625      is parsed into an output group named "<method name> <pass name>".
    626      """
    627 
    628   class ParsingState:
    629     OutsideBlock, InsideCompilationBlock, StartingCfgBlock, InsideCfgBlock = range(4)
    630 
    631   def __init__(self, outputStream, fileName=None):
    632     self.fileName = fileName
    633 
    634     # Initialize the state machine
    635     self.lastMethodName = None
    636     self.state = OutputFile.ParsingState.OutsideBlock
    637     self.groups = self._parseStream(outputStream)
    638 
    639   # This function is invoked on each line of the output file and returns a pair
    640   # which instructs the parser how the line should be handled. If the line is to
    641   # be included in the current group, it is returned in the first value. If the
    642   # line starts a new output group, the name of the group is returned in the
    643   # second value.
    644   def _processLine(self, line, lineNo):
    645     if self.state == OutputFile.ParsingState.StartingCfgBlock:
    646       # Previous line started a new 'cfg' block which means that this one must
    647       # contain the name of the pass (this is enforced by C1visualizer).
    648       if re.match("name\s+\"[^\"]+\"", line):
    649         # Extract the pass name, prepend it with the name of the method and
    650         # return as the beginning of a new group.
    651         self.state = OutputFile.ParsingState.InsideCfgBlock
    652         return (None, self.lastMethodName + " " + line.split("\"")[1])
    653       else:
    654         Logger.fail("Expected output group name", self.fileName, lineNo)
    655 
    656     elif self.state == OutputFile.ParsingState.InsideCfgBlock:
    657       if line == "end_cfg":
    658         self.state = OutputFile.ParsingState.OutsideBlock
    659         return (None, None)
    660       else:
    661         return (line, None)
    662 
    663     elif self.state == OutputFile.ParsingState.InsideCompilationBlock:
    664       # Search for the method's name. Format: method "<name>"
    665       if re.match("method\s+\"[^\"]*\"", line):
    666         methodName = line.split("\"")[1].strip()
    667         if not methodName:
    668           Logger.fail("Empty method name in output", self.fileName, lineNo)
    669         self.lastMethodName = methodName
    670       elif line == "end_compilation":
    671         self.state = OutputFile.ParsingState.OutsideBlock
    672       return (None, None)
    673 
    674     else:
    675       assert self.state == OutputFile.ParsingState.OutsideBlock
    676       if line == "begin_cfg":
    677         # The line starts a new group but we'll wait until the next line from
    678         # which we can extract the name of the pass.
    679         if self.lastMethodName is None:
    680           Logger.fail("Expected method header", self.fileName, lineNo)
    681         self.state = OutputFile.ParsingState.StartingCfgBlock
    682         return (None, None)
    683       elif line == "begin_compilation":
    684         self.state = OutputFile.ParsingState.InsideCompilationBlock
    685         return (None, None)
    686       else:
    687         Logger.fail("Output line not inside a group", self.fileName, lineNo)
    688 
    689   # Constructs an output group from the parser-collected output lines.
    690   def _processGroup(self, name, lines, lineNo):
    691     return OutputGroup(name, lines, self.fileName, lineNo + 1)
    692 
    693   def findGroup(self, name):
    694     for group in self.groups:
    695       if group.name == name:
    696         return group
    697     return None
    698 
    699 
    700 def ParseArguments():
    701   parser = argparse.ArgumentParser()
    702   parser.add_argument("tested_file",
    703                       help="text file the checks should be verified against")
    704   parser.add_argument("source_path", nargs="?",
    705                       help="path to file/folder with checking annotations")
    706   parser.add_argument("--check-prefix", dest="check_prefix", default="CHECK", metavar="PREFIX",
    707                       help="prefix of checks in the test files (default: CHECK)")
    708   parser.add_argument("--list-groups", dest="list_groups", action="store_true",
    709                       help="print a list of all groups found in the tested file")
    710   parser.add_argument("--dump-group", dest="dump_group", metavar="GROUP",
    711                       help="print the contents of an output group")
    712   parser.add_argument("-q", "--quiet", action="store_true",
    713                       help="print only errors")
    714   return parser.parse_args()
    715 
    716 
    717 def ListGroups(outputFilename):
    718   outputFile = OutputFile(open(outputFilename, "r"))
    719   for group in outputFile.groups:
    720     Logger.log(group.name)
    721 
    722 
    723 def DumpGroup(outputFilename, groupName):
    724   outputFile = OutputFile(open(outputFilename, "r"))
    725   group = outputFile.findGroup(groupName)
    726   if group:
    727     lineNo = group.lineNo
    728     maxLineNo = lineNo + len(group.body)
    729     lenLineNo = len(str(maxLineNo)) + 2
    730     for line in group.body:
    731       Logger.log((str(lineNo) + ":").ljust(lenLineNo) + line)
    732       lineNo += 1
    733   else:
    734     Logger.fail("Group \"" + groupName + "\" not found in the output")
    735 
    736 
    737 # Returns a list of files to scan for check annotations in the given path. Path
    738 # to a file is returned as a single-element list, directories are recursively
    739 # traversed and all '.java' files returned.
    740 def FindCheckFiles(path):
    741   if not path:
    742     Logger.fail("No source path provided")
    743   elif os.path.isfile(path):
    744     return [ path ]
    745   elif os.path.isdir(path):
    746     foundFiles = []
    747     for root, dirs, files in os.walk(path):
    748       for file in files:
    749         if os.path.splitext(file)[1] == ".java":
    750           foundFiles.append(os.path.join(root, file))
    751     return foundFiles
    752   else:
    753     Logger.fail("Source path \"" + path + "\" not found")
    754 
    755 
    756 def RunChecks(checkPrefix, checkPath, outputFilename):
    757   outputBaseName = os.path.basename(outputFilename)
    758   outputFile = OutputFile(open(outputFilename, "r"), outputBaseName)
    759 
    760   for checkFilename in FindCheckFiles(checkPath):
    761     checkBaseName = os.path.basename(checkFilename)
    762     checkFile = CheckFile(checkPrefix, open(checkFilename, "r"), checkBaseName)
    763     checkFile.match(outputFile)
    764 
    765 
    766 if __name__ == "__main__":
    767   args = ParseArguments()
    768 
    769   if args.quiet:
    770     Logger.Verbosity = Logger.Level.Error
    771 
    772   if args.list_groups:
    773     ListGroups(args.tested_file)
    774   elif args.dump_group:
    775     DumpGroup(args.tested_file, args.dump_group)
    776   else:
    777     RunChecks(args.check_prefix, args.source_path, args.tested_file)
    778