Home | History | Annotate | Download | only in code_coverage
      1 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 # Use of this source code is governed by a BSD-style license that can be
      3 # found in the LICENSE file.
      4 
      5 """Crocodile source scanners."""
      6 
      7 
      8 import re
      9 
     10 
     11 class Scanner(object):
     12   """Generic source scanner."""
     13 
     14   def __init__(self):
     15     """Constructor."""
     16 
     17     self.re_token = re.compile('#')
     18     self.comment_to_eol = ['#']
     19     self.comment_start = None
     20     self.comment_end = None
     21 
     22   def ScanLines(self, lines):
     23     """Scans the lines for executable statements.
     24 
     25     Args:
     26       lines: Iterator returning source lines.
     27 
     28     Returns:
     29       An array of line numbers which are executable.
     30     """
     31     exe_lines = []
     32     lineno = 0
     33 
     34     in_string = None
     35     in_comment = None
     36     comment_index = None
     37 
     38     for line in lines:
     39       lineno += 1
     40       in_string_at_start = in_string
     41 
     42       for t in self.re_token.finditer(line):
     43         tokenstr = t.groups()[0]
     44 
     45         if in_comment:
     46           # Inside a multi-line comment, so look for end token
     47           if tokenstr == in_comment:
     48             in_comment = None
     49             # Replace comment with spaces
     50             line = (line[:comment_index]
     51                     + ' ' * (t.end(0) - comment_index)
     52                     + line[t.end(0):])
     53 
     54         elif in_string:
     55           # Inside a string, so look for end token
     56           if tokenstr == in_string:
     57             in_string = None
     58 
     59         elif tokenstr in self.comment_to_eol:
     60           # Single-line comment, so truncate line at start of token
     61           line = line[:t.start(0)]
     62           break
     63 
     64         elif tokenstr == self.comment_start:
     65           # Multi-line comment start - end token is comment_end
     66           in_comment = self.comment_end
     67           comment_index = t.start(0)
     68 
     69         else:
     70           # Starting a string - end token is same as start
     71           in_string = tokenstr
     72 
     73       # If still in comment at end of line, remove comment
     74       if in_comment:
     75         line = line[:comment_index]
     76         # Next line, delete from the beginnine
     77         comment_index = 0
     78 
     79       # If line-sans-comments is not empty, claim it may be executable
     80       if line.strip() or in_string_at_start:
     81         exe_lines.append(lineno)
     82 
     83     # Return executable lines
     84     return exe_lines
     85 
     86   def Scan(self, filename):
     87     """Reads the file and scans its lines.
     88 
     89     Args:
     90       filename: Path to file to scan.
     91 
     92     Returns:
     93       An array of line numbers which are executable.
     94     """
     95 
     96     # TODO: All manner of error checking
     97     f = None
     98     try:
     99       f = open(filename, 'rt')
    100       return self.ScanLines(f)
    101     finally:
    102       if f:
    103         f.close()
    104 
    105 
    106 class PythonScanner(Scanner):
    107   """Python source scanner."""
    108 
    109   def __init__(self):
    110     """Constructor."""
    111     Scanner.__init__(self)
    112 
    113     # TODO: This breaks for strings ending in more than 2 backslashes.  Need
    114     # a pattern which counts only an odd number of backslashes, so the last
    115     # one thus escapes the quote.
    116     self.re_token = re.compile(r'(#|\'\'\'|"""|(?<!(?<!\\)\\)["\'])')
    117     self.comment_to_eol = ['#']
    118     self.comment_start = None
    119     self.comment_end = None
    120 
    121 
    122 class CppScanner(Scanner):
    123   """C / C++ / ObjC / ObjC++ source scanner."""
    124 
    125   def __init__(self):
    126     """Constructor."""
    127     Scanner.__init__(self)
    128 
    129     # TODO: This breaks for strings ending in more than 2 backslashes.  Need
    130     # a pattern which counts only an odd number of backslashes, so the last
    131     # one thus escapes the quote.
    132     self.re_token = re.compile(r'(^\s*#|//|/\*|\*/|(?<!(?<!\\)\\)["\'])')
    133 
    134     # TODO: Treat '\' at EOL as a token, and handle it as continuing the
    135     # previous line.  That is, if in a comment-to-eol, this line is a comment
    136     # too.
    137 
    138     # Note that we treat # at beginning of line as a comment, so that we ignore
    139     # preprocessor definitions
    140     self.comment_to_eol = ['//', '#']
    141 
    142     self.comment_start = '/*'
    143     self.comment_end = '*/'
    144 
    145 
    146 def ScanFile(filename, language):
    147   """Scans a file for executable lines.
    148 
    149   Args:
    150     filename: Path to file to scan.
    151     language: Language for file ('C', 'C++', 'python', 'ObjC', 'ObjC++')
    152 
    153   Returns:
    154     A list of executable lines, or an empty list if the file was not a handled
    155         language.
    156   """
    157 
    158   if language == 'python':
    159     return PythonScanner().Scan(filename)
    160   elif language in ['C', 'C++', 'ObjC', 'ObjC++']:
    161     return CppScanner().Scan(filename)
    162 
    163   # Something we don't handle
    164   return []
    165