1 # Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 # Use of this source code is governed by a BSD-style license that can be 3 # found in the LICENSE file. 4 5 """Crocodile source scanners.""" 6 7 8 import re 9 10 11 class Scanner(object): 12 """Generic source scanner.""" 13 14 def __init__(self): 15 """Constructor.""" 16 17 self.re_token = re.compile('#') 18 self.comment_to_eol = ['#'] 19 self.comment_start = None 20 self.comment_end = None 21 22 def ScanLines(self, lines): 23 """Scans the lines for executable statements. 24 25 Args: 26 lines: Iterator returning source lines. 27 28 Returns: 29 An array of line numbers which are executable. 30 """ 31 exe_lines = [] 32 lineno = 0 33 34 in_string = None 35 in_comment = None 36 comment_index = None 37 38 for line in lines: 39 lineno += 1 40 in_string_at_start = in_string 41 42 for t in self.re_token.finditer(line): 43 tokenstr = t.groups()[0] 44 45 if in_comment: 46 # Inside a multi-line comment, so look for end token 47 if tokenstr == in_comment: 48 in_comment = None 49 # Replace comment with spaces 50 line = (line[:comment_index] 51 + ' ' * (t.end(0) - comment_index) 52 + line[t.end(0):]) 53 54 elif in_string: 55 # Inside a string, so look for end token 56 if tokenstr == in_string: 57 in_string = None 58 59 elif tokenstr in self.comment_to_eol: 60 # Single-line comment, so truncate line at start of token 61 line = line[:t.start(0)] 62 break 63 64 elif tokenstr == self.comment_start: 65 # Multi-line comment start - end token is comment_end 66 in_comment = self.comment_end 67 comment_index = t.start(0) 68 69 else: 70 # Starting a string - end token is same as start 71 in_string = tokenstr 72 73 # If still in comment at end of line, remove comment 74 if in_comment: 75 line = line[:comment_index] 76 # Next line, delete from the beginnine 77 comment_index = 0 78 79 # If line-sans-comments is not empty, claim it may be executable 80 if line.strip() or in_string_at_start: 81 exe_lines.append(lineno) 82 83 # Return executable lines 84 return exe_lines 85 86 def Scan(self, filename): 87 """Reads the file and scans its lines. 88 89 Args: 90 filename: Path to file to scan. 91 92 Returns: 93 An array of line numbers which are executable. 94 """ 95 96 # TODO: All manner of error checking 97 f = None 98 try: 99 f = open(filename, 'rt') 100 return self.ScanLines(f) 101 finally: 102 if f: 103 f.close() 104 105 106 class PythonScanner(Scanner): 107 """Python source scanner.""" 108 109 def __init__(self): 110 """Constructor.""" 111 Scanner.__init__(self) 112 113 # TODO: This breaks for strings ending in more than 2 backslashes. Need 114 # a pattern which counts only an odd number of backslashes, so the last 115 # one thus escapes the quote. 116 self.re_token = re.compile(r'(#|\'\'\'|"""|(?<!(?<!\\)\\)["\'])') 117 self.comment_to_eol = ['#'] 118 self.comment_start = None 119 self.comment_end = None 120 121 122 class CppScanner(Scanner): 123 """C / C++ / ObjC / ObjC++ source scanner.""" 124 125 def __init__(self): 126 """Constructor.""" 127 Scanner.__init__(self) 128 129 # TODO: This breaks for strings ending in more than 2 backslashes. Need 130 # a pattern which counts only an odd number of backslashes, so the last 131 # one thus escapes the quote. 132 self.re_token = re.compile(r'(^\s*#|//|/\*|\*/|(?<!(?<!\\)\\)["\'])') 133 134 # TODO: Treat '\' at EOL as a token, and handle it as continuing the 135 # previous line. That is, if in a comment-to-eol, this line is a comment 136 # too. 137 138 # Note that we treat # at beginning of line as a comment, so that we ignore 139 # preprocessor definitions 140 self.comment_to_eol = ['//', '#'] 141 142 self.comment_start = '/*' 143 self.comment_end = '*/' 144 145 146 def ScanFile(filename, language): 147 """Scans a file for executable lines. 148 149 Args: 150 filename: Path to file to scan. 151 language: Language for file ('C', 'C++', 'python', 'ObjC', 'ObjC++') 152 153 Returns: 154 A list of executable lines, or an empty list if the file was not a handled 155 language. 156 """ 157 158 if language == 'python': 159 return PythonScanner().Scan(filename) 160 elif language in ['C', 'C++', 'ObjC', 'ObjC++']: 161 return CppScanner().Scan(filename) 162 163 # Something we don't handle 164 return [] 165