Home | History | Annotate | Download | only in libscanbuild
      1 # -*- coding: utf-8 -*-
      2 #                     The LLVM Compiler Infrastructure
      3 #
      4 # This file is distributed under the University of Illinois Open Source
      5 # License. See LICENSE.TXT for details.
      6 """ This module is responsible for to parse a compiler invocation. """
      7 
      8 import re
      9 import os
     10 import collections
     11 
     12 __all__ = ['split_command', 'classify_source', 'compiler_language']
     13 
     14 # Ignored compiler options map for compilation database creation.
     15 # The map is used in `split_command` method. (Which does ignore and classify
     16 # parameters.) Please note, that these are not the only parameters which
     17 # might be ignored.
     18 #
     19 # Keys are the option name, value number of options to skip
     20 IGNORED_FLAGS = {
     21     # compiling only flag, ignored because the creator of compilation
     22     # database will explicitly set it.
     23     '-c': 0,
     24     # preprocessor macros, ignored because would cause duplicate entries in
     25     # the output (the only difference would be these flags). this is actual
     26     # finding from users, who suffered longer execution time caused by the
     27     # duplicates.
     28     '-MD': 0,
     29     '-MMD': 0,
     30     '-MG': 0,
     31     '-MP': 0,
     32     '-MF': 1,
     33     '-MT': 1,
     34     '-MQ': 1,
     35     # linker options, ignored because for compilation database will contain
     36     # compilation commands only. so, the compiler would ignore these flags
     37     # anyway. the benefit to get rid of them is to make the output more
     38     # readable.
     39     '-static': 0,
     40     '-shared': 0,
     41     '-s': 0,
     42     '-rdynamic': 0,
     43     '-l': 1,
     44     '-L': 1,
     45     '-u': 1,
     46     '-z': 1,
     47     '-T': 1,
     48     '-Xlinker': 1
     49 }
     50 
     51 # Known C/C++ compiler executable name patterns
     52 COMPILER_PATTERNS = frozenset([
     53     re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
     54     re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
     55     re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
     56     re.compile(r'^llvm-g(cc|\+\+)$'),
     57 ])
     58 
     59 
     60 def split_command(command):
     61     """ Returns a value when the command is a compilation, None otherwise.
     62 
     63     The value on success is a named tuple with the following attributes:
     64 
     65         files:    list of source files
     66         flags:    list of compile options
     67         compiler: string value of 'c' or 'c++' """
     68 
     69     # the result of this method
     70     result = collections.namedtuple('Compilation',
     71                                     ['compiler', 'flags', 'files'])
     72     result.compiler = compiler_language(command)
     73     result.flags = []
     74     result.files = []
     75     # quit right now, if the program was not a C/C++ compiler
     76     if not result.compiler:
     77         return None
     78     # iterate on the compile options
     79     args = iter(command[1:])
     80     for arg in args:
     81         # quit when compilation pass is not involved
     82         if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
     83             return None
     84         # ignore some flags
     85         elif arg in IGNORED_FLAGS:
     86             count = IGNORED_FLAGS[arg]
     87             for _ in range(count):
     88                 next(args)
     89         elif re.match(r'^-(l|L|Wl,).+', arg):
     90             pass
     91         # some parameters could look like filename, take as compile option
     92         elif arg in {'-D', '-I'}:
     93             result.flags.extend([arg, next(args)])
     94         # parameter which looks source file is taken...
     95         elif re.match(r'^[^-].+', arg) and classify_source(arg):
     96             result.files.append(arg)
     97         # and consider everything else as compile option.
     98         else:
     99             result.flags.append(arg)
    100     # do extra check on number of source files
    101     return result if result.files else None
    102 
    103 
    104 def classify_source(filename, c_compiler=True):
    105     """ Return the language from file name extension. """
    106 
    107     mapping = {
    108         '.c': 'c' if c_compiler else 'c++',
    109         '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
    110         '.ii': 'c++-cpp-output',
    111         '.m': 'objective-c',
    112         '.mi': 'objective-c-cpp-output',
    113         '.mm': 'objective-c++',
    114         '.mii': 'objective-c++-cpp-output',
    115         '.C': 'c++',
    116         '.cc': 'c++',
    117         '.CC': 'c++',
    118         '.cp': 'c++',
    119         '.cpp': 'c++',
    120         '.cxx': 'c++',
    121         '.c++': 'c++',
    122         '.C++': 'c++',
    123         '.txx': 'c++'
    124     }
    125 
    126     __, extension = os.path.splitext(os.path.basename(filename))
    127     return mapping.get(extension)
    128 
    129 
    130 def compiler_language(command):
    131     """ A predicate to decide the command is a compiler call or not.
    132 
    133     Returns 'c' or 'c++' when it match. None otherwise. """
    134 
    135     cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
    136 
    137     if command:
    138         executable = os.path.basename(command[0])
    139         if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
    140             return 'c++' if cplusplus.match(executable) else 'c'
    141     return None
    142