1 # -*- coding: utf-8 -*- 2 # The LLVM Compiler Infrastructure 3 # 4 # This file is distributed under the University of Illinois Open Source 5 # License. See LICENSE.TXT for details. 6 """ This module is responsible for to parse a compiler invocation. """ 7 8 import re 9 import os 10 import collections 11 12 __all__ = ['split_command', 'classify_source', 'compiler_language'] 13 14 # Ignored compiler options map for compilation database creation. 15 # The map is used in `split_command` method. (Which does ignore and classify 16 # parameters.) Please note, that these are not the only parameters which 17 # might be ignored. 18 # 19 # Keys are the option name, value number of options to skip 20 IGNORED_FLAGS = { 21 # compiling only flag, ignored because the creator of compilation 22 # database will explicitly set it. 23 '-c': 0, 24 # preprocessor macros, ignored because would cause duplicate entries in 25 # the output (the only difference would be these flags). this is actual 26 # finding from users, who suffered longer execution time caused by the 27 # duplicates. 28 '-MD': 0, 29 '-MMD': 0, 30 '-MG': 0, 31 '-MP': 0, 32 '-MF': 1, 33 '-MT': 1, 34 '-MQ': 1, 35 # linker options, ignored because for compilation database will contain 36 # compilation commands only. so, the compiler would ignore these flags 37 # anyway. the benefit to get rid of them is to make the output more 38 # readable. 39 '-static': 0, 40 '-shared': 0, 41 '-s': 0, 42 '-rdynamic': 0, 43 '-l': 1, 44 '-L': 1, 45 '-u': 1, 46 '-z': 1, 47 '-T': 1, 48 '-Xlinker': 1 49 } 50 51 # Known C/C++ compiler executable name patterns 52 COMPILER_PATTERNS = frozenset([ 53 re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), 54 re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), 55 re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), 56 re.compile(r'^llvm-g(cc|\+\+)$'), 57 ]) 58 59 60 def split_command(command): 61 """ Returns a value when the command is a compilation, None otherwise. 62 63 The value on success is a named tuple with the following attributes: 64 65 files: list of source files 66 flags: list of compile options 67 compiler: string value of 'c' or 'c++' """ 68 69 # the result of this method 70 result = collections.namedtuple('Compilation', 71 ['compiler', 'flags', 'files']) 72 result.compiler = compiler_language(command) 73 result.flags = [] 74 result.files = [] 75 # quit right now, if the program was not a C/C++ compiler 76 if not result.compiler: 77 return None 78 # iterate on the compile options 79 args = iter(command[1:]) 80 for arg in args: 81 # quit when compilation pass is not involved 82 if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: 83 return None 84 # ignore some flags 85 elif arg in IGNORED_FLAGS: 86 count = IGNORED_FLAGS[arg] 87 for _ in range(count): 88 next(args) 89 elif re.match(r'^-(l|L|Wl,).+', arg): 90 pass 91 # some parameters could look like filename, take as compile option 92 elif arg in {'-D', '-I'}: 93 result.flags.extend([arg, next(args)]) 94 # parameter which looks source file is taken... 95 elif re.match(r'^[^-].+', arg) and classify_source(arg): 96 result.files.append(arg) 97 # and consider everything else as compile option. 98 else: 99 result.flags.append(arg) 100 # do extra check on number of source files 101 return result if result.files else None 102 103 104 def classify_source(filename, c_compiler=True): 105 """ Return the language from file name extension. """ 106 107 mapping = { 108 '.c': 'c' if c_compiler else 'c++', 109 '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', 110 '.ii': 'c++-cpp-output', 111 '.m': 'objective-c', 112 '.mi': 'objective-c-cpp-output', 113 '.mm': 'objective-c++', 114 '.mii': 'objective-c++-cpp-output', 115 '.C': 'c++', 116 '.cc': 'c++', 117 '.CC': 'c++', 118 '.cp': 'c++', 119 '.cpp': 'c++', 120 '.cxx': 'c++', 121 '.c++': 'c++', 122 '.C++': 'c++', 123 '.txx': 'c++' 124 } 125 126 __, extension = os.path.splitext(os.path.basename(filename)) 127 return mapping.get(extension) 128 129 130 def compiler_language(command): 131 """ A predicate to decide the command is a compiler call or not. 132 133 Returns 'c' or 'c++' when it match. None otherwise. """ 134 135 cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') 136 137 if command: 138 executable = os.path.basename(command[0]) 139 if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): 140 return 'c++' if cplusplus.match(executable) else 'c' 141 return None 142