Home | History | Annotate | Download | only in utils
      1 #!/usr/bin/env python2.7
      2 
      3 """A test case update script.
      4 
      5 This script is a utility to update LLVM 'llvm-mca' based test cases with new
      6 FileCheck patterns.
      7 """
      8 
      9 import argparse
     10 from collections import defaultdict
     11 import glob
     12 import os
     13 import sys
     14 import warnings
     15 
     16 from UpdateTestChecks import common
     17 
     18 
     19 COMMENT_CHAR = '#'
     20 ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
     21     COMMENT_CHAR)
     22 ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
     23 
     24 
     25 class Error(Exception):
     26   """ Generic Error that can be raised without printing a traceback.
     27   """
     28   pass
     29 
     30 
     31 def _warn(msg):
     32   """ Log a user warning to stderr.
     33   """
     34   warnings.warn(msg, Warning, stacklevel=2)
     35 
     36 
     37 def _configure_warnings(args):
     38   warnings.resetwarnings()
     39   if args.w:
     40     warnings.simplefilter('ignore')
     41   if args.Werror:
     42     warnings.simplefilter('error')
     43 
     44 
     45 def _showwarning(message, category, filename, lineno, file=None, line=None):
     46   """ Version of warnings.showwarning that won't attempt to print out the
     47       line at the location of the warning if the line text is not explicitly
     48       specified.
     49   """
     50   if file is None:
     51     file = sys.stderr
     52   if line is None:
     53     line = ''
     54   file.write(warnings.formatwarning(message, category, filename, lineno, line))
     55 
     56 
     57 def _parse_args():
     58   parser = argparse.ArgumentParser(description=__doc__)
     59   parser.add_argument('-v', '--verbose',
     60                       action='store_true',
     61                       help='show verbose output')
     62   parser.add_argument('-w',
     63                       action='store_true',
     64                       help='suppress warnings')
     65   parser.add_argument('-Werror',
     66                       action='store_true',
     67                       help='promote warnings to errors')
     68   parser.add_argument('--llvm-mca-binary',
     69                       metavar='<path>',
     70                       default='llvm-mca',
     71                       help='the binary to use to generate the test case '
     72                            '(default: llvm-mca)')
     73   parser.add_argument('tests',
     74                       metavar='<test-path>',
     75                       nargs='+')
     76   args = parser.parse_args()
     77 
     78   _configure_warnings(args)
     79 
     80   if not args.llvm_mca_binary:
     81     raise Error('--llvm-mca-binary value cannot be empty string')
     82 
     83   if os.path.basename(args.llvm_mca_binary) != 'llvm-mca':
     84     _warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
     85 
     86   return args
     87 
     88 
     89 def _find_run_lines(input_lines, args):
     90   raw_lines = [m.group(1)
     91                for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
     92                if m]
     93   run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
     94   for l in raw_lines[1:]:
     95     if run_lines[-1].endswith(r'\\'):
     96       run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
     97     else:
     98       run_lines.append(l)
     99 
    100   if args.verbose:
    101     sys.stderr.write('Found {} RUN line{}:\n'.format(
    102         len(run_lines), '' if len(run_lines) == 1 else 's'))
    103     for line in run_lines:
    104       sys.stderr.write('  RUN: {}\n'.format(line))
    105 
    106   return run_lines
    107 
    108 
    109 def _get_run_infos(run_lines, args):
    110   run_infos = []
    111   for run_line in run_lines:
    112     try:
    113       (tool_cmd, filecheck_cmd) = tuple([cmd.strip()
    114                                         for cmd in run_line.split('|', 1)])
    115     except ValueError:
    116       _warn('could not split tool and filecheck commands: {}'.format(run_line))
    117       continue
    118 
    119     tool_basename = os.path.basename(args.llvm_mca_binary)
    120 
    121     if not tool_cmd.startswith(tool_basename + ' '):
    122       _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
    123       continue
    124 
    125     if not filecheck_cmd.startswith('FileCheck '):
    126       _warn('skipping non-FileCheck RUN line: {}'.format(run_line))
    127       continue
    128 
    129     tool_cmd_args = tool_cmd[len(tool_basename):].strip()
    130     tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
    131 
    132     check_prefixes = [item
    133                       for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
    134                       for item in m.group(1).split(',')]
    135     if not check_prefixes:
    136       check_prefixes = ['CHECK']
    137 
    138     run_infos.append((check_prefixes, tool_cmd_args))
    139 
    140   return run_infos
    141 
    142 
    143 def _break_down_block(block_info, common_prefix):
    144   """ Given a block_info, see if we can analyze it further to let us break it
    145       down by prefix per-line rather than per-block.
    146   """
    147   texts = block_info.keys()
    148   prefixes = list(block_info.values())
    149   # Split the lines from each of the incoming block_texts and zip them so that
    150   # each element contains the corresponding lines from each text.  E.g.
    151   #
    152   # block_text_1: A   # line 1
    153   #               B   # line 2
    154   #
    155   # block_text_2: A   # line 1
    156   #               C   # line 2
    157   #
    158   # would become:
    159   #
    160   # [(A, A),   # line 1
    161   #  (B, C)]   # line 2
    162   #
    163   line_tuples = list(zip(*list((text.splitlines() for text in texts))))
    164 
    165   # To simplify output, we'll only proceed if the very first line of the block
    166   # texts is common to each of them.
    167   if len(set(line_tuples[0])) != 1:
    168     return []
    169 
    170   result = []
    171   lresult = defaultdict(list)
    172   for i, line in enumerate(line_tuples):
    173     if len(set(line)) == 1:
    174       # We're about to output a line with the common prefix.  This is a sync
    175       # point so flush any batched-up lines one prefix at a time to the output
    176       # first.
    177       for prefix in sorted(lresult):
    178         result.extend(lresult[prefix])
    179       lresult = defaultdict(list)
    180 
    181       # The line is common to each block so output with the common prefix.
    182       result.append((common_prefix, line[0]))
    183     else:
    184       # The line is not common to each block, or we don't have a common prefix.
    185       # If there are no prefixes available, warn and bail out.
    186       if not prefixes[0]:
    187         _warn('multiple lines not disambiguated by prefixes:\n{}\n'
    188               'Some blocks may be skipped entirely as a result.'.format(
    189                   '\n'.join('  - {}'.format(l) for l in line)))
    190         return []
    191 
    192       # Iterate through the line from each of the blocks and add the line with
    193       # the corresponding prefix to the current batch of results so that we can
    194       # later output them per-prefix.
    195       for i, l in enumerate(line):
    196         for prefix in prefixes[i]:
    197           lresult[prefix].append((prefix, l))
    198 
    199   # Flush any remaining batched-up lines one prefix at a time to the output.
    200   for prefix in sorted(lresult):
    201     result.extend(lresult[prefix])
    202   return result
    203 
    204 
    205 def _get_useful_prefix_info(run_infos):
    206   """ Given the run_infos, calculate any prefixes that are common to every one,
    207       and the length of the longest prefix string.
    208   """
    209   try:
    210     all_sets = [set(s) for s in list(zip(*run_infos))[0]]
    211     common_to_all = set.intersection(*all_sets)
    212     longest_prefix_len = max(len(p) for p in set.union(*all_sets))
    213   except IndexError:
    214     common_to_all = []
    215     longest_prefix_len = 0
    216   else:
    217     if len(common_to_all) > 1:
    218       _warn('Multiple prefixes common to all RUN lines: {}'.format(
    219           common_to_all))
    220     if common_to_all:
    221       common_to_all = sorted(common_to_all)[0]
    222   return common_to_all, longest_prefix_len
    223 
    224 
    225 def _get_block_infos(run_infos, test_path, args, common_prefix):  # noqa
    226   """ For each run line, run the tool with the specified args and collect the
    227       output. We use the concept of 'blocks' for uniquing, where a block is
    228       a series of lines of text with no more than one newline character between
    229       each one.  For example:
    230 
    231       This
    232       is
    233       one
    234       block
    235 
    236       This is
    237       another block
    238 
    239       This is yet another block
    240 
    241       We then build up a 'block_infos' structure containing a dict where the
    242       text of each block is the key and a list of the sets of prefixes that may
    243       generate that particular block.  This then goes through a series of
    244       transformations to minimise the amount of CHECK lines that need to be
    245       written by taking advantage of common prefixes.
    246   """
    247 
    248   def _block_key(tool_args, prefixes):
    249     """ Get a hashable key based on the current tool_args and prefixes.
    250     """
    251     return ' '.join([tool_args] + prefixes)
    252 
    253   all_blocks = {}
    254   max_block_len = 0
    255 
    256   # Run the tool for each run line to generate all of the blocks.
    257   for prefixes, tool_args in run_infos:
    258     key = _block_key(tool_args, prefixes)
    259     raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
    260                                          tool_args,
    261                                          test_path)
    262 
    263     # Replace any lines consisting of purely whitespace with empty lines.
    264     raw_tool_output = '\n'.join(line if line.strip() else ''
    265                                 for line in raw_tool_output.splitlines())
    266 
    267     # Split blocks, stripping all trailing whitespace, but keeping preceding
    268     # whitespace except for newlines so that columns will line up visually.
    269     all_blocks[key] = [b.lstrip('\n').rstrip()
    270                        for b in raw_tool_output.split('\n\n')]
    271     max_block_len = max(max_block_len, len(all_blocks[key]))
    272 
    273   # If necessary, pad the lists of blocks with empty blocks so that they are
    274   # all the same length.
    275   for key in all_blocks:
    276     len_to_pad = max_block_len - len(all_blocks[key])
    277     all_blocks[key] += [''] * len_to_pad
    278 
    279   # Create the block_infos structure where it is a nested dict in the form of:
    280   # block number -> block text -> list of prefix sets
    281   block_infos = defaultdict(lambda: defaultdict(list))
    282   for prefixes, tool_args in run_infos:
    283     key = _block_key(tool_args, prefixes)
    284     for block_num, block_text in enumerate(all_blocks[key]):
    285       block_infos[block_num][block_text].append(set(prefixes))
    286 
    287   # Now go through the block_infos structure and attempt to smartly prune the
    288   # number of prefixes per block to the minimal set possible to output.
    289   for block_num in range(len(block_infos)):
    290     # When there are multiple block texts for a block num, remove any
    291     # prefixes that are common to more than one of them.
    292     # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
    293     all_sets = [s for s in block_infos[block_num].values()]
    294     pruned_sets = []
    295 
    296     for i, setlist in enumerate(all_sets):
    297       other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
    298                               for set_ in setlist2 for elem in set_
    299                               if i != j])
    300       pruned_sets.append([s - other_set_values for s in setlist])
    301 
    302     for i, block_text in enumerate(block_infos[block_num]):
    303 
    304       # When a block text matches multiple sets of prefixes, try removing any
    305       # prefixes that aren't common to all of them.
    306       # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
    307       common_values = set.intersection(*pruned_sets[i])
    308       if common_values:
    309         pruned_sets[i] = [common_values]
    310 
    311       # Everything should be uniqued as much as possible by now.  Apply the
    312       # newly pruned sets to the block_infos structure.
    313       # If there are any blocks of text that still match multiple prefixes,
    314       # output a warning.
    315       current_set = set()
    316       for s in pruned_sets[i]:
    317         s = sorted(list(s))
    318         if s:
    319           current_set.add(s[0])
    320           if len(s) > 1:
    321             _warn('Multiple prefixes generating same output: {} '
    322                   '(discarding {})'.format(','.join(s), ','.join(s[1:])))
    323 
    324       block_infos[block_num][block_text] = sorted(list(current_set))
    325 
    326     # If we have multiple block_texts, try to break them down further to avoid
    327     # the case where we have very similar block_texts repeated after each
    328     # other.
    329     if common_prefix and len(block_infos[block_num]) > 1:
    330       # We'll only attempt this if each of the block_texts have the same number
    331       # of lines as each other.
    332       same_num_Lines = (len(set(len(k.splitlines())
    333                                 for k in block_infos[block_num].keys())) == 1)
    334       if same_num_Lines:
    335         breakdown = _break_down_block(block_infos[block_num], common_prefix)
    336         if breakdown:
    337           block_infos[block_num] = breakdown
    338 
    339   return block_infos
    340 
    341 
    342 def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
    343   """ Write an individual block, with correct padding on the prefixes.
    344   """
    345   end_prefix = ':     '
    346   previous_prefix = None
    347   num_lines_of_prefix = 0
    348 
    349   for prefix, line in block:
    350     if prefix in not_prefix_set:
    351       _warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
    352             'in input file.'.format(prefix))
    353       continue
    354 
    355     # If the previous line isn't already blank and we're writing more than one
    356     # line for the current prefix output a blank line first, unless either the
    357     # current of previous prefix is common to all.
    358     num_lines_of_prefix += 1
    359     if prefix != previous_prefix:
    360       if output and output[-1]:
    361         if num_lines_of_prefix > 1 or any(p == common_prefix
    362                                           for p in (prefix, previous_prefix)):
    363           output.append('')
    364       num_lines_of_prefix = 0
    365       previous_prefix = prefix
    366 
    367     output.append(
    368         '{} {}{}{} {}'.format(COMMENT_CHAR,
    369                               prefix,
    370                               end_prefix,
    371                               ' ' * (prefix_pad - len(prefix)),
    372                               line).rstrip())
    373     end_prefix = '-NEXT:'
    374 
    375   output.append('')
    376 
    377 
    378 def _write_output(test_path, input_lines, prefix_list, block_infos,  # noqa
    379                   args, common_prefix, prefix_pad):
    380   prefix_set = set([prefix for prefixes, _ in prefix_list
    381                     for prefix in prefixes])
    382   not_prefix_set = set()
    383 
    384   output_lines = []
    385   for input_line in input_lines:
    386     if input_line.startswith(ADVERT_PREFIX):
    387       continue
    388 
    389     if input_line.startswith(COMMENT_CHAR):
    390       m = common.CHECK_RE.match(input_line)
    391       try:
    392         prefix = m.group(1)
    393       except AttributeError:
    394         prefix = None
    395 
    396       if '{}-NOT:'.format(prefix) in input_line:
    397         not_prefix_set.add(prefix)
    398 
    399       if prefix not in prefix_set or prefix in not_prefix_set:
    400         output_lines.append(input_line)
    401         continue
    402 
    403     if common.should_add_line_to_output(input_line, prefix_set):
    404       # This input line of the function body will go as-is into the output.
    405       # Except make leading whitespace uniform: 2 spaces.
    406       input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
    407 
    408       # Skip empty lines if the previous output line is also empty.
    409       if input_line or output_lines[-1]:
    410         output_lines.append(input_line)
    411     else:
    412       continue
    413 
    414   # Add a blank line before the new checks if required.
    415   if len(output_lines) > 0 and output_lines[-1]:
    416     output_lines.append('')
    417 
    418   output_check_lines = []
    419   for block_num in range(len(block_infos)):
    420     for block_text in sorted(block_infos[block_num]):
    421       if not block_text:
    422         continue
    423 
    424       if type(block_infos[block_num]) is list:
    425         # The block is of the type output from _break_down_block().
    426         _write_block(output_check_lines,
    427                      block_infos[block_num],
    428                      not_prefix_set,
    429                      common_prefix,
    430                      prefix_pad)
    431         break
    432       elif block_infos[block_num][block_text]:
    433         # _break_down_block() was unable to do do anything so output the block
    434         # as-is.
    435         lines = block_text.split('\n')
    436         for prefix in block_infos[block_num][block_text]:
    437           _write_block(output_check_lines,
    438                        [(prefix, line) for line in lines],
    439                        not_prefix_set,
    440                        common_prefix,
    441                        prefix_pad)
    442 
    443   if output_check_lines:
    444     output_lines.insert(0, ADVERT)
    445     output_lines.extend(output_check_lines)
    446 
    447   # The file should not end with two newlines. It creates unnecessary churn.
    448   while len(output_lines) > 0 and output_lines[-1] == '':
    449     output_lines.pop()
    450 
    451   if input_lines == output_lines:
    452     sys.stderr.write('            [unchanged]\n')
    453     return
    454   sys.stderr.write('      [{} lines total]\n'.format(len(output_lines)))
    455 
    456   if args.verbose:
    457     sys.stderr.write(
    458         'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
    459 
    460   with open(test_path, 'wb') as f:
    461     f.writelines(['{}\n'.format(l).encode() for l in output_lines])
    462 
    463 def main():
    464   args = _parse_args()
    465   test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
    466   for test_path in test_paths:
    467     sys.stderr.write('Test: {}\n'.format(test_path))
    468 
    469     # Call this per test. By default each warning will only be written once
    470     # per source location. Reset the warning filter so that now each warning
    471     # will be written once per source location per test.
    472     _configure_warnings(args)
    473 
    474     if args.verbose:
    475       sys.stderr.write(
    476           'Scanning for RUN lines in test file: {}\n'.format(test_path))
    477 
    478     if not os.path.isfile(test_path):
    479       raise Error('could not find test file: {}'.format(test_path))
    480 
    481     with open(test_path) as f:
    482       input_lines = [l.rstrip() for l in f]
    483 
    484     run_lines = _find_run_lines(input_lines, args)
    485     run_infos = _get_run_infos(run_lines, args)
    486     common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
    487     block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
    488     _write_output(test_path,
    489                   input_lines,
    490                   run_infos,
    491                   block_infos,
    492                   args,
    493                   common_prefix,
    494                   prefix_pad)
    495 
    496   return 0
    497 
    498 
    499 if __name__ == '__main__':
    500   try:
    501     warnings.showwarning = _showwarning
    502     sys.exit(main())
    503   except Error as e:
    504     sys.stdout.write('error: {}\n'.format(e))
    505     sys.exit(1)
    506