Home | History | Annotate | Download | only in checkers
      1 # -*- coding: utf-8 -*-
      2 #
      3 # Copyright (C) 2009, 2010, 2012 Google Inc. All rights reserved.
      4 # Copyright (C) 2009 Torch Mobile Inc.
      5 # Copyright (C) 2009 Apple Inc. All rights reserved.
      6 # Copyright (C) 2010 Chris Jerdonek (cjerdonek (at] webkit.org)
      7 #
      8 # Redistribution and use in source and binary forms, with or without
      9 # modification, are permitted provided that the following conditions are
     10 # met:
     11 #
     12 #    * Redistributions of source code must retain the above copyright
     13 # notice, this list of conditions and the following disclaimer.
     14 #    * Redistributions in binary form must reproduce the above
     15 # copyright notice, this list of conditions and the following disclaimer
     16 # in the documentation and/or other materials provided with the
     17 # distribution.
     18 #    * Neither the name of Google Inc. nor the names of its
     19 # contributors may be used to endorse or promote products derived from
     20 # this software without specific prior written permission.
     21 #
     22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     23 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     24 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     25 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     26 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     27 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     28 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     29 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     30 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     31 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     32 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     33 
     34 # This is the modified version of Google's cpplint. The original code is
     35 # http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
     36 
     37 """Support for check-webkit-style."""
     38 
     39 import math  # for log
     40 import os
     41 import os.path
     42 import re
     43 import sre_compile
     44 import string
     45 import sys
     46 import unicodedata
     47 
     48 from webkitpy.common.memoized import memoized
     49 from webkitpy.common.system.filesystem import FileSystem
     50 
     51 # Headers that we consider STL headers.
     52 _STL_HEADERS = frozenset([
     53     'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
     54     'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
     55     'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
     56     'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
     57     'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
     58     'utility', 'vector', 'vector.h',
     59     ])
     60 
     61 
     62 # Non-STL C++ system headers.
     63 _CPP_HEADERS = frozenset([
     64     'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
     65     'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
     66     'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
     67     'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
     68     'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
     69     'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
     70     'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
     71     'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
     72     'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
     73     'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
     74     'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
     75     'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
     76     'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
     77     ])
     78 
     79 
     80 # Assertion macros.  These are defined in base/logging.h and
     81 # testing/base/gunit.h.  Note that the _M versions need to come first
     82 # for substring matching to work.
     83 _CHECK_MACROS = [
     84     'DCHECK', 'CHECK',
     85     'EXPECT_TRUE_M', 'EXPECT_TRUE',
     86     'ASSERT_TRUE_M', 'ASSERT_TRUE',
     87     'EXPECT_FALSE_M', 'EXPECT_FALSE',
     88     'ASSERT_FALSE_M', 'ASSERT_FALSE',
     89     ]
     90 
     91 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
     92 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
     93 
     94 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
     95                         ('>=', 'GE'), ('>', 'GT'),
     96                         ('<=', 'LE'), ('<', 'LT')]:
     97     _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
     98     _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
     99     _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    100     _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    101     _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
    102     _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
    103 
    104 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
    105                             ('>=', 'LT'), ('>', 'LE'),
    106                             ('<=', 'GT'), ('<', 'GE')]:
    107     _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    108     _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    109     _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
    110     _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
    111 
    112 
    113 # These constants define types of headers for use with
    114 # _IncludeState.check_next_include_order().
    115 _CONFIG_HEADER = 0
    116 _PRIMARY_HEADER = 1
    117 _OTHER_HEADER = 2
    118 _MOC_HEADER = 3
    119 
    120 
    121 # The regexp compilation caching is inlined in all regexp functions for
    122 # performance reasons; factoring it out into a separate function turns out
    123 # to be noticeably expensive.
    124 _regexp_compile_cache = {}
    125 
    126 
    127 def match(pattern, s):
    128     """Matches the string with the pattern, caching the compiled regexp."""
    129     if not pattern in _regexp_compile_cache:
    130         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    131     return _regexp_compile_cache[pattern].match(s)
    132 
    133 
    134 def search(pattern, s):
    135     """Searches the string for the pattern, caching the compiled regexp."""
    136     if not pattern in _regexp_compile_cache:
    137         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    138     return _regexp_compile_cache[pattern].search(s)
    139 
    140 
    141 def sub(pattern, replacement, s):
    142     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    143     if not pattern in _regexp_compile_cache:
    144         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    145     return _regexp_compile_cache[pattern].sub(replacement, s)
    146 
    147 
    148 def subn(pattern, replacement, s):
    149     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    150     if not pattern in _regexp_compile_cache:
    151         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    152     return _regexp_compile_cache[pattern].subn(replacement, s)
    153 
    154 
    155 def iteratively_replace_matches_with_char(pattern, char_replacement, s):
    156     """Returns the string with replacement done.
    157 
    158     Every character in the match is replaced with char.
    159     Due to the iterative nature, pattern should not match char or
    160     there will be an infinite loop.
    161 
    162     Example:
    163       pattern = r'<[^>]>' # template parameters
    164       char_replacement =  '_'
    165       s =     'A<B<C, D>>'
    166       Returns 'A_________'
    167 
    168     Args:
    169       pattern: The regex to match.
    170       char_replacement: The character to put in place of every
    171                         character of the match.
    172       s: The string on which to do the replacements.
    173 
    174     Returns:
    175       True, if the given line is blank.
    176     """
    177     while True:
    178         matched = search(pattern, s)
    179         if not matched:
    180             return s
    181         start_match_index = matched.start(0)
    182         end_match_index = matched.end(0)
    183         match_length = end_match_index - start_match_index
    184         s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:]
    185 
    186 
    187 def _find_in_lines(regex, lines, start_position, not_found_position):
    188     """Does a find starting at start position and going forward until
    189     a match is found.
    190 
    191     Returns the position where the regex started.
    192     """
    193     current_row = start_position.row
    194 
    195     # Start with the given row and trim off everything before what should be matched.
    196     current_line = lines[start_position.row][start_position.column:]
    197     starting_offset = start_position.column
    198     while True:
    199         found_match = search(regex, current_line)
    200         if found_match:
    201             return Position(current_row, starting_offset + found_match.start())
    202 
    203         # A match was not found so continue forward.
    204         current_row += 1
    205         starting_offset = 0
    206         if current_row >= len(lines):
    207             return not_found_position
    208         current_line = lines[current_row]
    209 
    210 def _rfind_in_lines(regex, lines, start_position, not_found_position):
    211     """Does a reverse find starting at start position and going backwards until
    212     a match is found.
    213 
    214     Returns the position where the regex ended.
    215     """
    216     # Put the regex in a group and proceed it with a greedy expression that
    217     # matches anything to ensure that we get the last possible match in a line.
    218     last_in_line_regex = r'.*(' + regex + ')'
    219     current_row = start_position.row
    220 
    221     # Start with the given row and trim off everything past what may be matched.
    222     current_line = lines[start_position.row][:start_position.column]
    223     while True:
    224         found_match = match(last_in_line_regex, current_line)
    225         if found_match:
    226             return Position(current_row, found_match.end(1))
    227 
    228         # A match was not found so continue backward.
    229         current_row -= 1
    230         if current_row < 0:
    231             return not_found_position
    232         current_line = lines[current_row]
    233 
    234 
    235 def _convert_to_lower_with_underscores(text):
    236     """Converts all text strings in camelCase or PascalCase to lowers with underscores."""
    237 
    238     # First add underscores before any capital letter followed by a lower case letter
    239     # as long as it is in a word.
    240     # (This put an underscore before Password but not P and A in WPAPassword).
    241     text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text)
    242 
    243     # Next add underscores before capitals at the end of words if it was
    244     # preceeded by lower case letter or number.
    245     # (This puts an underscore before A in isA but not A in CBA).
    246     text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text)
    247 
    248     # Next add underscores when you have a captial letter which is followed by a capital letter
    249     # but is not proceeded by one. (This puts an underscore before A in 'WordADay').
    250     text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text)
    251 
    252     return text.lower()
    253 
    254 
    255 
    256 def _create_acronym(text):
    257     """Creates an acronym for the given text."""
    258     # Removes all lower case letters except those starting words.
    259     text = sub(r'(?<!\b)[a-z]', '', text)
    260     return text.upper()
    261 
    262 
    263 def up_to_unmatched_closing_paren(s):
    264     """Splits a string into two parts up to first unmatched ')'.
    265 
    266     Args:
    267       s: a string which is a substring of line after '('
    268       (e.g., "a == (b + c))").
    269 
    270     Returns:
    271       A pair of strings (prefix before first unmatched ')',
    272       remainder of s after first unmatched ')'), e.g.,
    273       up_to_unmatched_closing_paren("a == (b + c)) { ")
    274       returns "a == (b + c)", " {".
    275       Returns None, None if there is no unmatched ')'
    276 
    277     """
    278     i = 1
    279     for pos, c in enumerate(s):
    280       if c == '(':
    281         i += 1
    282       elif c == ')':
    283         i -= 1
    284         if i == 0:
    285           return s[:pos], s[pos + 1:]
    286     return None, None
    287 
    288 class _IncludeState(dict):
    289     """Tracks line numbers for includes, and the order in which includes appear.
    290 
    291     As a dict, an _IncludeState object serves as a mapping between include
    292     filename and line number on which that file was included.
    293 
    294     Call check_next_include_order() once for each header in the file, passing
    295     in the type constants defined above. Calls in an illegal order will
    296     raise an _IncludeError with an appropriate error message.
    297 
    298     """
    299     # self._section will move monotonically through this set. If it ever
    300     # needs to move backwards, check_next_include_order will raise an error.
    301     _INITIAL_SECTION = 0
    302     _CONFIG_SECTION = 1
    303     _PRIMARY_SECTION = 2
    304     _OTHER_SECTION = 3
    305 
    306     _TYPE_NAMES = {
    307         _CONFIG_HEADER: 'WebCore config.h',
    308         _PRIMARY_HEADER: 'header this file implements',
    309         _OTHER_HEADER: 'other header',
    310         _MOC_HEADER: 'moc file',
    311         }
    312     _SECTION_NAMES = {
    313         _INITIAL_SECTION: "... nothing.",
    314         _CONFIG_SECTION: "WebCore config.h.",
    315         _PRIMARY_SECTION: 'a header this file implements.',
    316         _OTHER_SECTION: 'other header.',
    317         }
    318 
    319     def __init__(self):
    320         dict.__init__(self)
    321         self._section = self._INITIAL_SECTION
    322         self._visited_primary_section = False
    323         self.header_types = dict();
    324 
    325     def visited_primary_section(self):
    326         return self._visited_primary_section
    327 
    328     def check_next_include_order(self, header_type, file_is_header, primary_header_exists):
    329         """Returns a non-empty error message if the next header is out of order.
    330 
    331         This function also updates the internal state to be ready to check
    332         the next include.
    333 
    334         Args:
    335           header_type: One of the _XXX_HEADER constants defined above.
    336           file_is_header: Whether the file that owns this _IncludeState is itself a header
    337 
    338         Returns:
    339           The empty string if the header is in the right order, or an
    340           error message describing what's wrong.
    341 
    342         """
    343         if header_type == _CONFIG_HEADER and file_is_header:
    344             return 'Header file should not contain WebCore config.h.'
    345         if header_type == _PRIMARY_HEADER and file_is_header:
    346             return 'Header file should not contain itself.'
    347         if header_type == _MOC_HEADER:
    348             return ''
    349 
    350         error_message = ''
    351         if self._section != self._OTHER_SECTION:
    352             before_error_message = ('Found %s before %s' %
    353                                     (self._TYPE_NAMES[header_type],
    354                                      self._SECTION_NAMES[self._section + 1]))
    355         after_error_message = ('Found %s after %s' %
    356                                 (self._TYPE_NAMES[header_type],
    357                                  self._SECTION_NAMES[self._section]))
    358 
    359         if header_type == _CONFIG_HEADER:
    360             if self._section >= self._CONFIG_SECTION:
    361                 error_message = after_error_message
    362             self._section = self._CONFIG_SECTION
    363         elif header_type == _PRIMARY_HEADER:
    364             if self._section >= self._PRIMARY_SECTION:
    365                 error_message = after_error_message
    366             elif self._section < self._CONFIG_SECTION:
    367                 error_message = before_error_message
    368             self._section = self._PRIMARY_SECTION
    369             self._visited_primary_section = True
    370         else:
    371             assert header_type == _OTHER_HEADER
    372             if not file_is_header and self._section < self._PRIMARY_SECTION:
    373                 if primary_header_exists:
    374                     error_message = before_error_message
    375             self._section = self._OTHER_SECTION
    376 
    377         return error_message
    378 
    379 
    380 class Position(object):
    381     """Holds the position of something."""
    382     def __init__(self, row, column):
    383         self.row = row
    384         self.column = column
    385 
    386     def __str__(self):
    387         return '(%s, %s)' % (self.row, self.column)
    388 
    389     def __cmp__(self, other):
    390         return self.row.__cmp__(other.row) or self.column.__cmp__(other.column)
    391 
    392 
    393 class Parameter(object):
    394     """Information about one function parameter."""
    395     def __init__(self, parameter, parameter_name_index, row):
    396         self.type = parameter[:parameter_name_index].strip()
    397         # Remove any initializers from the parameter name (e.g. int i = 5).
    398         self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip()
    399         self.row = row
    400 
    401     @memoized
    402     def lower_with_underscores_name(self):
    403         """Returns the parameter name in the lower with underscores format."""
    404         return _convert_to_lower_with_underscores(self.name)
    405 
    406 
    407 class SingleLineView(object):
    408     """Converts multiple lines into a single line (with line breaks replaced by a
    409        space) to allow for easier searching."""
    410     def __init__(self, lines, start_position, end_position):
    411         """Create a SingleLineView instance.
    412 
    413         Args:
    414           lines: a list of multiple lines to combine into a single line.
    415           start_position: offset within lines of where to start the single line.
    416           end_position: just after where to end (like a slice operation).
    417         """
    418         # Get the rows of interest.
    419         trimmed_lines = lines[start_position.row:end_position.row + 1]
    420 
    421         # Remove the columns on the last line that aren't included.
    422         trimmed_lines[-1] = trimmed_lines[-1][:end_position.column]
    423 
    424         # Remove the columns on the first line that aren't included.
    425         trimmed_lines[0] = trimmed_lines[0][start_position.column:]
    426 
    427         # Create a single line with all of the parameters.
    428         self.single_line = ' '.join(trimmed_lines)
    429 
    430         # Keep the row lengths, so we can calculate the original row number
    431         # given a column in the single line (adding 1 due to the space added
    432         # during the join).
    433         self._row_lengths = [len(line) + 1 for line in trimmed_lines]
    434         self._starting_row = start_position.row
    435 
    436     def convert_column_to_row(self, single_line_column_number):
    437         """Convert the column number from the single line into the original
    438         line number.
    439 
    440         Special cases:
    441         * Columns in the added spaces are considered part of the previous line.
    442         * Columns beyond the end of the line are consider part the last line
    443         in the view."""
    444         total_columns = 0
    445         row_offset = 0
    446         while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]:
    447             total_columns += self._row_lengths[row_offset]
    448             row_offset += 1
    449         return self._starting_row + row_offset
    450 
    451 
    452 def create_skeleton_parameters(all_parameters):
    453     """Converts a parameter list to a skeleton version.
    454 
    455     The skeleton only has one word for the parameter name, one word for the type,
    456     and commas after each parameter and only there. Everything in the skeleton
    457     remains in the same columns as the original."""
    458     all_simplifications = (
    459         # Remove template parameters, function declaration parameters, etc.
    460         r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})',
    461         # Remove all initializers.
    462         r'=[^,]*',
    463         # Remove :: and everything before it.
    464         r'[^,]*::',
    465         # Remove modifiers like &, *.
    466         r'[&*]',
    467         # Remove const modifiers.
    468         r'\bconst\s+(?=[A-Za-z])',
    469         # Remove numerical modifiers like long.
    470         r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)')
    471 
    472     skeleton_parameters = all_parameters
    473     for simplification in all_simplifications:
    474         skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters)
    475     # If there are any parameters, then add a , after the last one to
    476     # make a regular pattern of a , following every parameter.
    477     if skeleton_parameters.strip():
    478         skeleton_parameters += ','
    479     return skeleton_parameters
    480 
    481 
    482 def find_parameter_name_index(skeleton_parameter):
    483     """Determines where the parametere name starts given the skeleton parameter."""
    484     # The first space from the right in the simplified parameter is where the parameter
    485     # name starts unless the first space is before any content in the simplified parameter.
    486     before_name_index = skeleton_parameter.rstrip().rfind(' ')
    487     if before_name_index != -1 and skeleton_parameter[:before_name_index].strip():
    488         return before_name_index + 1
    489     return len(skeleton_parameter)
    490 
    491 
    492 def parameter_list(elided_lines, start_position, end_position):
    493     """Generator for a function's parameters."""
    494     # Create new positions that omit the outer parenthesis of the parameters.
    495     start_position = Position(row=start_position.row, column=start_position.column + 1)
    496     end_position = Position(row=end_position.row, column=end_position.column - 1)
    497     single_line_view = SingleLineView(elided_lines, start_position, end_position)
    498     skeleton_parameters = create_skeleton_parameters(single_line_view.single_line)
    499     end_index = -1
    500 
    501     while True:
    502         # Find the end of the next parameter.
    503         start_index = end_index + 1
    504         end_index = skeleton_parameters.find(',', start_index)
    505 
    506         # No comma means that all parameters have been parsed.
    507         if end_index == -1:
    508             return
    509         row = single_line_view.convert_column_to_row(end_index)
    510 
    511         # Parse the parameter into a type and parameter name.
    512         skeleton_parameter = skeleton_parameters[start_index:end_index]
    513         name_offset = find_parameter_name_index(skeleton_parameter)
    514         parameter = single_line_view.single_line[start_index:end_index]
    515         yield Parameter(parameter, name_offset, row)
    516 
    517 
    518 class _FunctionState(object):
    519     """Tracks current function name and the number of lines in its body.
    520 
    521     Attributes:
    522       min_confidence: The minimum confidence level to use while checking style.
    523 
    524     """
    525 
    526     _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    527     _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
    528 
    529     def __init__(self, min_confidence):
    530         self.min_confidence = min_confidence
    531         self.current_function = ''
    532         self.in_a_function = False
    533         self.lines_in_function = 0
    534         # Make sure these will not be mistaken for real positions (even when a
    535         # small amount is added to them).
    536         self.body_start_position = Position(-1000, 0)
    537         self.end_position = Position(-1000, 0)
    538 
    539     def begin(self, function_name, function_name_start_position, body_start_position, end_position,
    540               parameter_start_position, parameter_end_position, clean_lines):
    541         """Start analyzing function body.
    542 
    543         Args:
    544             function_name: The name of the function being tracked.
    545             function_name_start_position: Position in elided where the function name starts.
    546             body_start_position: Position in elided of the { or the ; for a prototype.
    547             end_position: Position in elided just after the final } (or ; is.
    548             parameter_start_position: Position in elided of the '(' for the parameters.
    549             parameter_end_position: Position in elided just after the ')' for the parameters.
    550             clean_lines: A CleansedLines instance containing the file.
    551         """
    552         self.in_a_function = True
    553         self.lines_in_function = -1  # Don't count the open brace line.
    554         self.current_function = function_name
    555         self.function_name_start_position = function_name_start_position
    556         self.body_start_position = body_start_position
    557         self.end_position = end_position
    558         self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';'
    559         self.parameter_start_position = parameter_start_position
    560         self.parameter_end_position = parameter_end_position
    561         self.is_pure = False
    562         if self.is_declaration:
    563             characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line
    564             self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters))
    565         self._clean_lines = clean_lines
    566         self._parameter_list = None
    567 
    568     def modifiers_and_return_type(self):
    569         """Returns the modifiers and the return type."""
    570         # Go backwards from where the function name is until we encounter one of several things:
    571         #   ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0)
    572         elided = self._clean_lines.elided
    573         start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)',
    574                                           elided, self.parameter_start_position, Position(0, 0))
    575         return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip()
    576 
    577     def parameter_list(self):
    578         if not self._parameter_list:
    579             # Store the final result as a tuple since that is immutable.
    580             self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position))
    581 
    582         return self._parameter_list
    583 
    584     def count(self, line_number):
    585         """Count line in current function body."""
    586         if self.in_a_function and line_number >= self.body_start_position.row:
    587             self.lines_in_function += 1
    588 
    589     def check(self, error, line_number):
    590         """Report if too many lines in function body.
    591 
    592         Args:
    593           error: The function to call with any errors found.
    594           line_number: The number of the line to check.
    595         """
    596         if match(r'T(EST|est)', self.current_function):
    597             base_trigger = self._TEST_TRIGGER
    598         else:
    599             base_trigger = self._NORMAL_TRIGGER
    600         trigger = base_trigger * 2 ** self.min_confidence
    601 
    602         if self.lines_in_function > trigger:
    603             error_level = int(math.log(self.lines_in_function / base_trigger, 2))
    604             # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
    605             if error_level > 5:
    606                 error_level = 5
    607             error(line_number, 'readability/fn_size', error_level,
    608                   'Small and focused functions are preferred:'
    609                   ' %s has %d non-comment lines'
    610                   ' (error triggered by exceeding %d lines).'  % (
    611                       self.current_function, self.lines_in_function, trigger))
    612 
    613     def end(self):
    614         """Stop analyzing function body."""
    615         self.in_a_function = False
    616 
    617 
    618 class _IncludeError(Exception):
    619     """Indicates a problem with the include order in a file."""
    620     pass
    621 
    622 
    623 class FileInfo:
    624     """Provides utility functions for filenames.
    625 
    626     FileInfo provides easy access to the components of a file's path
    627     relative to the project root.
    628     """
    629 
    630     def __init__(self, filename):
    631         self._filename = filename
    632 
    633     def full_name(self):
    634         """Make Windows paths like Unix."""
    635         return os.path.abspath(self._filename).replace('\\', '/')
    636 
    637     def repository_name(self):
    638         """Full name after removing the local path to the repository.
    639 
    640         If we have a real absolute path name here we can try to do something smart:
    641         detecting the root of the checkout and truncating /path/to/checkout from
    642         the name so that we get header guards that don't include things like
    643         "C:\Documents and Settings\..." or "/home/username/..." in them and thus
    644         people on different computers who have checked the source out to different
    645         locations won't see bogus errors.
    646         """
    647         fullname = self.full_name()
    648 
    649         if os.path.exists(fullname):
    650             project_dir = os.path.dirname(fullname)
    651 
    652             if os.path.exists(os.path.join(project_dir, ".svn")):
    653                 # If there's a .svn file in the current directory, we
    654                 # recursively look up the directory tree for the top
    655                 # of the SVN checkout
    656                 root_dir = project_dir
    657                 one_up_dir = os.path.dirname(root_dir)
    658                 while os.path.exists(os.path.join(one_up_dir, ".svn")):
    659                     root_dir = os.path.dirname(root_dir)
    660                     one_up_dir = os.path.dirname(one_up_dir)
    661 
    662                 prefix = os.path.commonprefix([root_dir, project_dir])
    663                 return fullname[len(prefix) + 1:]
    664 
    665             # Not SVN? Try to find a git top level directory by
    666             # searching up from the current path.
    667             root_dir = os.path.dirname(fullname)
    668             while (root_dir != os.path.dirname(root_dir)
    669                    and not os.path.exists(os.path.join(root_dir, ".git"))):
    670                 root_dir = os.path.dirname(root_dir)
    671                 if os.path.exists(os.path.join(root_dir, ".git")):
    672                     prefix = os.path.commonprefix([root_dir, project_dir])
    673                     return fullname[len(prefix) + 1:]
    674 
    675         # Don't know what to do; header guard warnings may be wrong...
    676         return fullname
    677 
    678     def split(self):
    679         """Splits the file into the directory, basename, and extension.
    680 
    681         For 'chrome/browser/browser.cpp', Split() would
    682         return ('chrome/browser', 'browser', '.cpp')
    683 
    684         Returns:
    685           A tuple of (directory, basename, extension).
    686         """
    687 
    688         googlename = self.repository_name()
    689         project, rest = os.path.split(googlename)
    690         return (project,) + os.path.splitext(rest)
    691 
    692     def base_name(self):
    693         """File base name - text after the final slash, before the final period."""
    694         return self.split()[1]
    695 
    696     def extension(self):
    697         """File extension - text following the final period."""
    698         return self.split()[2]
    699 
    700     def no_extension(self):
    701         """File has no source file extension."""
    702         return '/'.join(self.split()[0:2])
    703 
    704     def is_source(self):
    705         """File has a source file extension."""
    706         return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
    707 
    708 
    709 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
    710 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
    711     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
    712 # Matches strings.  Escape codes should already be removed by ESCAPES.
    713 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
    714 # Matches characters.  Escape codes should already be removed by ESCAPES.
    715 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
    716 # Matches multi-line C++ comments.
    717 # This RE is a little bit more complicated than one might expect, because we
    718 # have to take care of space removals tools so we can handle comments inside
    719 # statements better.
    720 # The current rule is: We only clear spaces from both sides when we're at the
    721 # end of the line. Otherwise, we try to remove spaces from the right side,
    722 # if this doesn't work we try on left side but only if there's a non-character
    723 # on the right.
    724 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
    725     r"""(\s*/\*.*\*/\s*$|
    726             /\*.*\*/\s+|
    727          \s+/\*.*\*/(?=\W)|
    728             /\*.*\*/)""", re.VERBOSE)
    729 
    730 
    731 def is_cpp_string(line):
    732     """Does line terminate so, that the next symbol is in string constant.
    733 
    734     This function does not consider single-line nor multi-line comments.
    735 
    736     Args:
    737       line: is a partial line of code starting from the 0..n.
    738 
    739     Returns:
    740       True, if next character appended to 'line' is inside a
    741       string constant.
    742     """
    743 
    744     line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
    745     return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
    746 
    747 
    748 def find_next_multi_line_comment_start(lines, line_index):
    749     """Find the beginning marker for a multiline comment."""
    750     while line_index < len(lines):
    751         if lines[line_index].strip().startswith('/*'):
    752             # Only return this marker if the comment goes beyond this line
    753             if lines[line_index].strip().find('*/', 2) < 0:
    754                 return line_index
    755         line_index += 1
    756     return len(lines)
    757 
    758 
    759 def find_next_multi_line_comment_end(lines, line_index):
    760     """We are inside a comment, find the end marker."""
    761     while line_index < len(lines):
    762         if lines[line_index].strip().endswith('*/'):
    763             return line_index
    764         line_index += 1
    765     return len(lines)
    766 
    767 
    768 def remove_multi_line_comments_from_range(lines, begin, end):
    769     """Clears a range of lines for multi-line comments."""
    770     # Having // dummy comments makes the lines non-empty, so we will not get
    771     # unnecessary blank line warnings later in the code.
    772     for i in range(begin, end):
    773         lines[i] = '// dummy'
    774 
    775 
    776 def remove_multi_line_comments(lines, error):
    777     """Removes multiline (c-style) comments from lines."""
    778     line_index = 0
    779     while line_index < len(lines):
    780         line_index_begin = find_next_multi_line_comment_start(lines, line_index)
    781         if line_index_begin >= len(lines):
    782             return
    783         line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
    784         if line_index_end >= len(lines):
    785             error(line_index_begin + 1, 'readability/multiline_comment', 5,
    786                   'Could not find end of multi-line comment')
    787             return
    788         remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
    789         line_index = line_index_end + 1
    790 
    791 
    792 def cleanse_comments(line):
    793     """Removes //-comments and single-line C-style /* */ comments.
    794 
    795     Args:
    796       line: A line of C++ source.
    797 
    798     Returns:
    799       The line with single-line comments removed.
    800     """
    801     comment_position = line.find('//')
    802     if comment_position != -1 and not is_cpp_string(line[:comment_position]):
    803         line = line[:comment_position]
    804     # get rid of /* ... */
    805     return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
    806 
    807 
    808 class CleansedLines(object):
    809     """Holds 3 copies of all lines with different preprocessing applied to them.
    810 
    811     1) elided member contains lines without strings and comments,
    812     2) lines member contains lines without comments, and
    813     3) raw member contains all the lines without processing.
    814     All these three members are of <type 'list'>, and of the same length.
    815     """
    816 
    817     def __init__(self, lines):
    818         self.elided = []
    819         self.lines = []
    820         self.raw_lines = lines
    821         self._num_lines = len(lines)
    822         for line_number in range(len(lines)):
    823             self.lines.append(cleanse_comments(lines[line_number]))
    824             elided = self.collapse_strings(lines[line_number])
    825             self.elided.append(cleanse_comments(elided))
    826 
    827     def num_lines(self):
    828         """Returns the number of lines represented."""
    829         return self._num_lines
    830 
    831     @staticmethod
    832     def collapse_strings(elided):
    833         """Collapses strings and chars on a line to simple "" or '' blocks.
    834 
    835         We nix strings first so we're not fooled by text like '"http://"'
    836 
    837         Args:
    838           elided: The line being processed.
    839 
    840         Returns:
    841           The line with collapsed strings.
    842         """
    843         if not _RE_PATTERN_INCLUDE.match(elided):
    844             # Remove escaped characters first to make quote/single quote collapsing
    845             # basic.  Things that look like escaped characters shouldn't occur
    846             # outside of strings and chars.
    847             elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
    848             elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
    849             elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
    850         return elided
    851 
    852 
    853 def close_expression(elided, position):
    854     """If input points to ( or { or [, finds the position that closes it.
    855 
    856     If elided[position.row][position.column] points to a '(' or '{' or '[',
    857     finds the line_number/pos that correspond to the closing of the expression.
    858 
    859      Args:
    860        elided: A CleansedLines.elided instance containing the file.
    861        position: The position of the opening item.
    862 
    863      Returns:
    864       The Position *past* the closing brace, or Position(len(elided), -1)
    865       if we never find a close. Note we ignore strings and comments when matching.
    866     """
    867     line = elided[position.row]
    868     start_character = line[position.column]
    869     if start_character == '(':
    870         enclosing_character_regex = r'[\(\)]'
    871     elif start_character == '[':
    872         enclosing_character_regex = r'[\[\]]'
    873     elif start_character == '{':
    874         enclosing_character_regex = r'[\{\}]'
    875     else:
    876         return Position(len(elided), -1)
    877 
    878     current_column = position.column + 1
    879     line_number = position.row
    880     net_open = 1
    881     for line in elided[position.row:]:
    882         line = line[current_column:]
    883 
    884         # Search the current line for opening and closing characters.
    885         while True:
    886             next_enclosing_character = search(enclosing_character_regex, line)
    887             # No more on this line.
    888             if not next_enclosing_character:
    889                 break
    890             current_column += next_enclosing_character.end(0)
    891             line = line[next_enclosing_character.end(0):]
    892             if next_enclosing_character.group(0) == start_character:
    893                 net_open += 1
    894             else:
    895                 net_open -= 1
    896                 if not net_open:
    897                     return Position(line_number, current_column)
    898 
    899         # Proceed to the next line.
    900         line_number += 1
    901         current_column = 0
    902 
    903     # The given item was not closed.
    904     return Position(len(elided), -1)
    905 
    906 def check_for_copyright(lines, error):
    907     """Logs an error if no Copyright message appears at the top of the file."""
    908 
    909     # We'll say it should occur by line 10. Don't forget there's a
    910     # dummy line at the front.
    911     for line in xrange(1, min(len(lines), 11)):
    912         if re.search(r'Copyright', lines[line], re.I):
    913             break
    914     else:                       # means no copyright line was found
    915         error(0, 'legal/copyright', 5,
    916               'No copyright message found.  '
    917               'You should have a line: "Copyright [year] <Copyright Owner>"')
    918 
    919 
    920 def get_header_guard_cpp_variable(filename):
    921     """Returns the CPP variable that should be used as a header guard.
    922 
    923     Args:
    924       filename: The name of a C++ header file.
    925 
    926     Returns:
    927       The CPP variable that should be used as a header guard in the
    928       named file.
    929 
    930     """
    931 
    932     # Restores original filename in case that style checker is invoked from Emacs's
    933     # flymake.
    934     filename = re.sub(r'_flymake\.h$', '.h', filename)
    935 
    936     standard_name = sub(r'[-.\s]', '_', os.path.basename(filename))
    937 
    938     # Files under WTF typically have header guards that start with WTF_.
    939     if '/wtf/' in filename:
    940         special_name = "WTF_" + standard_name
    941     else:
    942         special_name = standard_name
    943     return (special_name, standard_name)
    944 
    945 
    946 def check_for_header_guard(filename, lines, error):
    947     """Checks that the file contains a header guard.
    948 
    949     Logs an error if no #ifndef header guard is present.  For other
    950     headers, checks that the full pathname is used.
    951 
    952     Args:
    953       filename: The name of the C++ header file.
    954       lines: An array of strings, each representing a line of the file.
    955       error: The function to call with any errors found.
    956     """
    957 
    958     cppvar = get_header_guard_cpp_variable(filename)
    959 
    960     ifndef = None
    961     ifndef_line_number = 0
    962     define = None
    963     for line_number, line in enumerate(lines):
    964         line_split = line.split()
    965         if len(line_split) >= 2:
    966             # find the first occurrence of #ifndef and #define, save arg
    967             if not ifndef and line_split[0] == '#ifndef':
    968                 # set ifndef to the header guard presented on the #ifndef line.
    969                 ifndef = line_split[1]
    970                 ifndef_line_number = line_number
    971             if not define and line_split[0] == '#define':
    972                 define = line_split[1]
    973             if define and ifndef:
    974                 break
    975 
    976     if not ifndef or not define or ifndef != define:
    977         error(0, 'build/header_guard', 5,
    978               'No #ifndef header guard found, suggested CPP variable is: %s' %
    979               cppvar[0])
    980         return
    981 
    982     # The guard should be File_h.
    983     if ifndef not in cppvar:
    984         error(ifndef_line_number, 'build/header_guard', 5,
    985               '#ifndef header guard has wrong style, please use: %s' % cppvar[0])
    986 
    987 
    988 def check_for_unicode_replacement_characters(lines, error):
    989     """Logs an error for each line containing Unicode replacement characters.
    990 
    991     These indicate that either the file contained invalid UTF-8 (likely)
    992     or Unicode replacement characters (which it shouldn't).  Note that
    993     it's possible for this to throw off line numbering if the invalid
    994     UTF-8 occurred adjacent to a newline.
    995 
    996     Args:
    997       lines: An array of strings, each representing a line of the file.
    998       error: The function to call with any errors found.
    999     """
   1000     for line_number, line in enumerate(lines):
   1001         if u'\ufffd' in line:
   1002             error(line_number, 'readability/utf8', 5,
   1003                   'Line contains invalid UTF-8 (or Unicode replacement character).')
   1004 
   1005 
   1006 def check_for_new_line_at_eof(lines, error):
   1007     """Logs an error if there is no newline char at the end of the file.
   1008 
   1009     Args:
   1010       lines: An array of strings, each representing a line of the file.
   1011       error: The function to call with any errors found.
   1012     """
   1013 
   1014     # The array lines() was created by adding two newlines to the
   1015     # original file (go figure), then splitting on \n.
   1016     # To verify that the file ends in \n, we just have to make sure the
   1017     # last-but-two element of lines() exists and is empty.
   1018     if len(lines) < 3 or lines[-2]:
   1019         error(len(lines) - 2, 'whitespace/ending_newline', 5,
   1020               'Could not find a newline character at the end of the file.')
   1021 
   1022 
   1023 def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
   1024     """Logs an error if we see /* ... */ or "..." that extend past one line.
   1025 
   1026     /* ... */ comments are legit inside macros, for one line.
   1027     Otherwise, we prefer // comments, so it's ok to warn about the
   1028     other.  Likewise, it's ok for strings to extend across multiple
   1029     lines, as long as a line continuation character (backslash)
   1030     terminates each line. Although not currently prohibited by the C++
   1031     style guide, it's ugly and unnecessary. We don't do well with either
   1032     in this lint program, so we warn about both.
   1033 
   1034     Args:
   1035       clean_lines: A CleansedLines instance containing the file.
   1036       line_number: The number of the line to check.
   1037       error: The function to call with any errors found.
   1038     """
   1039     line = clean_lines.elided[line_number]
   1040 
   1041     # Remove all \\ (escaped backslashes) from the line. They are OK, and the
   1042     # second (escaped) slash may trigger later \" detection erroneously.
   1043     line = line.replace('\\\\', '')
   1044 
   1045     if line.count('/*') > line.count('*/'):
   1046         error(line_number, 'readability/multiline_comment', 5,
   1047               'Complex multi-line /*...*/-style comment found. '
   1048               'Lint may give bogus warnings.  '
   1049               'Consider replacing these with //-style comments, '
   1050               'with #if 0...#endif, '
   1051               'or with more clearly structured multi-line comments.')
   1052 
   1053     if (line.count('"') - line.count('\\"')) % 2:
   1054         error(line_number, 'readability/multiline_string', 5,
   1055               'Multi-line string ("...") found.  This lint script doesn\'t '
   1056               'do well with such strings, and may give bogus warnings.  They\'re '
   1057               'ugly and unnecessary, and you should use concatenation instead".')
   1058 
   1059 
   1060 _THREADING_LIST = (
   1061     ('asctime(', 'asctime_r('),
   1062     ('ctime(', 'ctime_r('),
   1063     ('getgrgid(', 'getgrgid_r('),
   1064     ('getgrnam(', 'getgrnam_r('),
   1065     ('getlogin(', 'getlogin_r('),
   1066     ('getpwnam(', 'getpwnam_r('),
   1067     ('getpwuid(', 'getpwuid_r('),
   1068     ('gmtime(', 'gmtime_r('),
   1069     ('localtime(', 'localtime_r('),
   1070     ('rand(', 'rand_r('),
   1071     ('readdir(', 'readdir_r('),
   1072     ('strtok(', 'strtok_r('),
   1073     ('ttyname(', 'ttyname_r('),
   1074     )
   1075 
   1076 
   1077 def check_posix_threading(clean_lines, line_number, error):
   1078     """Checks for calls to thread-unsafe functions.
   1079 
   1080     Much code has been originally written without consideration of
   1081     multi-threading. Also, engineers are relying on their old experience;
   1082     they have learned posix before threading extensions were added. These
   1083     tests guide the engineers to use thread-safe functions (when using
   1084     posix directly).
   1085 
   1086     Args:
   1087       clean_lines: A CleansedLines instance containing the file.
   1088       line_number: The number of the line to check.
   1089       error: The function to call with any errors found.
   1090     """
   1091     line = clean_lines.elided[line_number]
   1092     for single_thread_function, multithread_safe_function in _THREADING_LIST:
   1093         index = line.find(single_thread_function)
   1094         # Comparisons made explicit for clarity
   1095         if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
   1096                                           and line[index - 1] not in ('_', '.', '>'))):
   1097             error(line_number, 'runtime/threadsafe_fn', 2,
   1098                   'Consider using ' + multithread_safe_function +
   1099                   '...) instead of ' + single_thread_function +
   1100                   '...) for improved thread safety.')
   1101 
   1102 
   1103 # Matches invalid increment: *count++, which moves pointer instead of
   1104 # incrementing a value.
   1105 _RE_PATTERN_INVALID_INCREMENT = re.compile(
   1106     r'^\s*\*\w+(\+\+|--);')
   1107 
   1108 
   1109 def check_invalid_increment(clean_lines, line_number, error):
   1110     """Checks for invalid increment *count++.
   1111 
   1112     For example following function:
   1113     void increment_counter(int* count) {
   1114         *count++;
   1115     }
   1116     is invalid, because it effectively does count++, moving pointer, and should
   1117     be replaced with ++*count, (*count)++ or *count += 1.
   1118 
   1119     Args:
   1120       clean_lines: A CleansedLines instance containing the file.
   1121       line_number: The number of the line to check.
   1122       error: The function to call with any errors found.
   1123     """
   1124     line = clean_lines.elided[line_number]
   1125     if _RE_PATTERN_INVALID_INCREMENT.match(line):
   1126         error(line_number, 'runtime/invalid_increment', 5,
   1127               'Changing pointer instead of value (or unused value of operator*).')
   1128 
   1129 
   1130 class _ClassInfo(object):
   1131     """Stores information about a class."""
   1132 
   1133     def __init__(self, name, line_number):
   1134         self.name = name
   1135         self.line_number = line_number
   1136         self.seen_open_brace = False
   1137         self.is_derived = False
   1138         self.virtual_method_line_number = None
   1139         self.has_virtual_destructor = False
   1140         self.brace_depth = 0
   1141         self.unsigned_bitfields = []
   1142         self.bool_bitfields = []
   1143 
   1144 
   1145 class _ClassState(object):
   1146     """Holds the current state of the parse relating to class declarations.
   1147 
   1148     It maintains a stack of _ClassInfos representing the parser's guess
   1149     as to the current nesting of class declarations. The innermost class
   1150     is at the top (back) of the stack. Typically, the stack will either
   1151     be empty or have exactly one entry.
   1152     """
   1153 
   1154     def __init__(self):
   1155         self.classinfo_stack = []
   1156 
   1157     def check_finished(self, error):
   1158         """Checks that all classes have been completely parsed.
   1159 
   1160         Call this when all lines in a file have been processed.
   1161         Args:
   1162           error: The function to call with any errors found.
   1163         """
   1164         if self.classinfo_stack:
   1165             # Note: This test can result in false positives if #ifdef constructs
   1166             # get in the way of brace matching. See the testBuildClass test in
   1167             # cpp_style_unittest.py for an example of this.
   1168             error(self.classinfo_stack[0].line_number, 'build/class', 5,
   1169                   'Failed to find complete declaration of class %s' %
   1170                   self.classinfo_stack[0].name)
   1171 
   1172 
   1173 class _FileState(object):
   1174     def __init__(self, clean_lines, file_extension):
   1175         self._did_inside_namespace_indent_warning = False
   1176         self._clean_lines = clean_lines
   1177         if file_extension in ['m', 'mm']:
   1178             self._is_objective_c = True
   1179             self._is_c = False
   1180         elif file_extension == 'h':
   1181             # In the case of header files, it is unknown if the file
   1182             # is c / objective c or not, so set this value to None and then
   1183             # if it is requested, use heuristics to guess the value.
   1184             self._is_objective_c = None
   1185             self._is_c = None
   1186         elif file_extension == 'c':
   1187             self._is_c = True
   1188             self._is_objective_c = False
   1189         else:
   1190             self._is_objective_c = False
   1191             self._is_c = False
   1192 
   1193     def set_did_inside_namespace_indent_warning(self):
   1194         self._did_inside_namespace_indent_warning = True
   1195 
   1196     def did_inside_namespace_indent_warning(self):
   1197         return self._did_inside_namespace_indent_warning
   1198 
   1199     def is_objective_c(self):
   1200         if self._is_objective_c is None:
   1201             for line in self._clean_lines.elided:
   1202                 # Starting with @ or #import seem like the best indications
   1203                 # that we have an Objective C file.
   1204                 if line.startswith("@") or line.startswith("#import"):
   1205                     self._is_objective_c = True
   1206                     break
   1207             else:
   1208                 self._is_objective_c = False
   1209         return self._is_objective_c
   1210 
   1211     def is_c(self):
   1212         if self._is_c is None:
   1213             for line in self._clean_lines.lines:
   1214                 # if extern "C" is found, then it is a good indication
   1215                 # that we have a C header file.
   1216                 if line.startswith('extern "C"'):
   1217                     self._is_c = True
   1218                     break
   1219             else:
   1220                 self._is_c = False
   1221         return self._is_c
   1222 
   1223     def is_c_or_objective_c(self):
   1224         """Return whether the file extension corresponds to C or Objective-C."""
   1225         return self.is_c() or self.is_objective_c()
   1226 
   1227 
   1228 class _EnumState(object):
   1229     """Maintains whether currently in an enum declaration, and checks whether
   1230     enum declarations follow the style guide.
   1231     """
   1232 
   1233     def __init__(self):
   1234         self.in_enum_decl = False
   1235         self.is_webidl_enum = False
   1236 
   1237     def process_clean_line(self, line):
   1238         # FIXME: The regular expressions for expr_all_uppercase and expr_enum_end only accept integers
   1239         # and identifiers for the value of the enumerator, but do not accept any other constant
   1240         # expressions. However, this is sufficient for now (11/27/2012).
   1241         expr_all_uppercase = r'\s*[A-Z0-9_]+\s*(?:=\s*[a-zA-Z0-9]+\s*)?,?\s*$'
   1242         expr_starts_lowercase = r'\s*[a-z]'
   1243         expr_enum_end = r'}\s*(?:[a-zA-Z0-9]+\s*(?:=\s*[a-zA-Z0-9]+)?)?\s*;\s*'
   1244         expr_enum_start = r'\s*enum(?:\s+[a-zA-Z0-9]+)?\s*\{?\s*'
   1245         if self.in_enum_decl:
   1246             if match(r'\s*' + expr_enum_end + r'$', line):
   1247                 self.in_enum_decl = False
   1248                 self.is_webidl_enum = False
   1249             elif match(expr_all_uppercase, line):
   1250                 return self.is_webidl_enum
   1251             elif match(expr_starts_lowercase, line):
   1252                 return False
   1253         else:
   1254             matched = match(expr_enum_start + r'$', line)
   1255             if matched:
   1256                 self.in_enum_decl = True
   1257             else:
   1258                 matched = match(expr_enum_start + r'(?P<members>.*)' + expr_enum_end + r'$', line)
   1259                 if matched:
   1260                     members = matched.group('members').split(',')
   1261                     found_invalid_member = False
   1262                     for member in members:
   1263                         if match(expr_all_uppercase, member):
   1264                             found_invalid_member = not self.is_webidl_enum
   1265                         if match(expr_starts_lowercase, member):
   1266                             found_invalid_member = True
   1267                         if found_invalid_member:
   1268                             self.is_webidl_enum = False
   1269                             return False
   1270                     return True
   1271         return True
   1272 
   1273 def check_for_non_standard_constructs(clean_lines, line_number,
   1274                                       class_state, error):
   1275     """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
   1276 
   1277     Complain about several constructs which gcc-2 accepts, but which are
   1278     not standard C++.  Warning about these in lint is one way to ease the
   1279     transition to new compilers.
   1280     - put storage class first (e.g. "static const" instead of "const static").
   1281     - "%lld" instead of %qd" in printf-type functions.
   1282     - "%1$d" is non-standard in printf-type functions.
   1283     - "\%" is an undefined character escape sequence.
   1284     - text after #endif is not allowed.
   1285     - invalid inner-style forward declaration.
   1286     - >? and <? operators, and their >?= and <?= cousins.
   1287     - classes with virtual methods need virtual destructors (compiler warning
   1288         available, but not turned on yet.)
   1289 
   1290     Additionally, check for constructor/destructor style violations as it
   1291     is very convenient to do so while checking for gcc-2 compliance.
   1292 
   1293     Args:
   1294       clean_lines: A CleansedLines instance containing the file.
   1295       line_number: The number of the line to check.
   1296       class_state: A _ClassState instance which maintains information about
   1297                    the current stack of nested class declarations being parsed.
   1298       error: A callable to which errors are reported, which takes parameters:
   1299              line number, error level, and message
   1300     """
   1301 
   1302     # Remove comments from the line, but leave in strings for now.
   1303     line = clean_lines.lines[line_number]
   1304 
   1305     if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
   1306         error(line_number, 'runtime/printf_format', 3,
   1307               '%q in format strings is deprecated.  Use %ll instead.')
   1308 
   1309     if search(r'printf\s*\(.*".*%\d+\$', line):
   1310         error(line_number, 'runtime/printf_format', 2,
   1311               '%N$ formats are unconventional.  Try rewriting to avoid them.')
   1312 
   1313     # Remove escaped backslashes before looking for undefined escapes.
   1314     line = line.replace('\\\\', '')
   1315 
   1316     if search(r'("|\').*\\(%|\[|\(|{)', line):
   1317         error(line_number, 'build/printf_format', 3,
   1318               '%, [, (, and { are undefined character escapes.  Unescape them.')
   1319 
   1320     # For the rest, work with both comments and strings removed.
   1321     line = clean_lines.elided[line_number]
   1322 
   1323     if search(r'\b(const|volatile|void|char|short|int|long'
   1324               r'|float|double|signed|unsigned'
   1325               r'|schar|u?int8|u?int16|u?int32|u?int64)'
   1326               r'\s+(auto|register|static|extern|typedef)\b',
   1327               line):
   1328         error(line_number, 'build/storage_class', 5,
   1329               'Storage class (static, extern, typedef, etc) should be first.')
   1330 
   1331     if match(r'\s*#\s*endif\s*[^/\s]+', line):
   1332         error(line_number, 'build/endif_comment', 5,
   1333               'Uncommented text after #endif is non-standard.  Use a comment.')
   1334 
   1335     if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
   1336         error(line_number, 'build/forward_decl', 5,
   1337               'Inner-style forward declarations are invalid.  Remove this line.')
   1338 
   1339     if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
   1340         error(line_number, 'build/deprecated', 3,
   1341               '>? and <? (max and min) operators are non-standard and deprecated.')
   1342 
   1343     # Track class entry and exit, and attempt to find cases within the
   1344     # class declaration that don't meet the C++ style
   1345     # guidelines. Tracking is very dependent on the code matching Google
   1346     # style guidelines, but it seems to perform well enough in testing
   1347     # to be a worthwhile addition to the checks.
   1348     classinfo_stack = class_state.classinfo_stack
   1349     # Look for a class declaration
   1350     class_decl_match = match(
   1351         r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
   1352     if class_decl_match:
   1353         classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
   1354 
   1355     # Everything else in this function uses the top of the stack if it's
   1356     # not empty.
   1357     if not classinfo_stack:
   1358         return
   1359 
   1360     classinfo = classinfo_stack[-1]
   1361 
   1362     # If the opening brace hasn't been seen look for it and also
   1363     # parent class declarations.
   1364     if not classinfo.seen_open_brace:
   1365         # If the line has a ';' in it, assume it's a forward declaration or
   1366         # a single-line class declaration, which we won't process.
   1367         if line.find(';') != -1:
   1368             classinfo_stack.pop()
   1369             return
   1370         classinfo.seen_open_brace = (line.find('{') != -1)
   1371         # Look for a bare ':'
   1372         if search('(^|[^:]):($|[^:])', line):
   1373             classinfo.is_derived = True
   1374         if not classinfo.seen_open_brace:
   1375             return  # Everything else in this function is for after open brace
   1376 
   1377     # The class may have been declared with namespace or classname qualifiers.
   1378     # The constructor and destructor will not have those qualifiers.
   1379     base_classname = classinfo.name.split('::')[-1]
   1380 
   1381     # Look for single-argument constructors that aren't marked explicit.
   1382     # Technically a valid construct, but against style.
   1383     args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
   1384                  % re.escape(base_classname),
   1385                  line)
   1386     if (args
   1387         and args.group(1) != 'void'
   1388         and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
   1389                       args.group(1).strip())):
   1390         error(line_number, 'runtime/explicit', 5,
   1391               'Single-argument constructors should be marked explicit.')
   1392 
   1393     # Look for methods declared virtual.
   1394     if search(r'\bvirtual\b', line):
   1395         classinfo.virtual_method_line_number = line_number
   1396         # Only look for a destructor declaration on the same line. It would
   1397         # be extremely unlikely for the destructor declaration to occupy
   1398         # more than one line.
   1399         if search(r'~%s\s*\(' % base_classname, line):
   1400             classinfo.has_virtual_destructor = True
   1401 
   1402     # Look for class end.
   1403     brace_depth = classinfo.brace_depth
   1404     brace_depth = brace_depth + line.count('{') - line.count('}')
   1405     if brace_depth <= 0:
   1406         classinfo = classinfo_stack.pop()
   1407         # Try to detect missing virtual destructor declarations.
   1408         # For now, only warn if a non-derived class with virtual methods lacks
   1409         # a virtual destructor. This is to make it less likely that people will
   1410         # declare derived virtual destructors without declaring the base
   1411         # destructor virtual.
   1412         if ((classinfo.virtual_method_line_number is not None)
   1413             and (not classinfo.has_virtual_destructor)
   1414             and (not classinfo.is_derived)):  # Only warn for base classes
   1415             error(classinfo.line_number, 'runtime/virtual', 4,
   1416                   'The class %s probably needs a virtual destructor due to '
   1417                   'having virtual method(s), one declared at line %d.'
   1418                   % (classinfo.name, classinfo.virtual_method_line_number))
   1419         # Look for mixed bool and unsigned bitfields.
   1420         if (classinfo.bool_bitfields and classinfo.unsigned_bitfields):
   1421             bool_list = ', '.join(classinfo.bool_bitfields)
   1422             unsigned_list = ', '.join(classinfo.unsigned_bitfields)
   1423             error(classinfo.line_number, 'runtime/bitfields', 5,
   1424                   'The class %s contains mixed unsigned and bool bitfields, '
   1425                   'which will pack into separate words on the MSVC compiler.\n'
   1426                   'Bool bitfields are [%s].\nUnsigned bitfields are [%s].\n'
   1427                   'Consider converting bool bitfields to unsigned.'
   1428                   % (classinfo.name, bool_list, unsigned_list))
   1429     else:
   1430         classinfo.brace_depth = brace_depth
   1431 
   1432     well_typed_bitfield = False;
   1433     # Look for bool <name> : 1 declarations.
   1434     args = search(r'\bbool\s+(\S*)\s*:\s*\d+\s*;', line)
   1435     if args:
   1436         classinfo.bool_bitfields.append('%d: %s' % (line_number, args.group(1)))
   1437         well_typed_bitfield = True;
   1438 
   1439     # Look for unsigned <name> : n declarations.
   1440     args = search(r'\bunsigned\s+(?:int\s+)?(\S+)\s*:\s*\d+\s*;', line)
   1441     if args:
   1442         classinfo.unsigned_bitfields.append('%d: %s' % (line_number, args.group(1)))
   1443         well_typed_bitfield = True;
   1444 
   1445     # Look for other bitfield declarations. We don't care about those in
   1446     # size-matching structs.
   1447     if not (well_typed_bitfield or classinfo.name.startswith('SameSizeAs') or
   1448             classinfo.name.startswith('Expected')):
   1449         args = match(r'\s*(\S+)\s+(\S+)\s*:\s*\d+\s*;', line)
   1450         if args:
   1451             error(line_number, 'runtime/bitfields', 4,
   1452                   'Member %s of class %s defined as a bitfield of type %s. '
   1453                   'Please declare all bitfields as unsigned.'
   1454                   % (args.group(2), classinfo.name, args.group(1)))
   1455 
   1456 def check_spacing_for_function_call(line, line_number, error):
   1457     """Checks for the correctness of various spacing around function calls.
   1458 
   1459     Args:
   1460       line: The text of the line to check.
   1461       line_number: The number of the line to check.
   1462       error: The function to call with any errors found.
   1463     """
   1464 
   1465     # Since function calls often occur inside if/for/foreach/while/switch
   1466     # expressions - which have their own, more liberal conventions - we
   1467     # first see if we should be looking inside such an expression for a
   1468     # function call, to which we can apply more strict standards.
   1469     function_call = line    # if there's no control flow construct, look at whole line
   1470     for pattern in (r'\bif\s*\((.*)\)\s*{',
   1471                     r'\bfor\s*\((.*)\)\s*{',
   1472                     r'\bforeach\s*\((.*)\)\s*{',
   1473                     r'\bwhile\s*\((.*)\)\s*[{;]',
   1474                     r'\bswitch\s*\((.*)\)\s*{'):
   1475         matched = search(pattern, line)
   1476         if matched:
   1477             function_call = matched.group(1)    # look inside the parens for function calls
   1478             break
   1479 
   1480     # Except in if/for/foreach/while/switch, there should never be space
   1481     # immediately inside parens (eg "f( 3, 4 )").  We make an exception
   1482     # for nested parens ( (a+b) + c ).  Likewise, there should never be
   1483     # a space before a ( when it's a function argument.  I assume it's a
   1484     # function argument when the char before the whitespace is legal in
   1485     # a function name (alnum + _) and we're not starting a macro. Also ignore
   1486     # pointers and references to arrays and functions coz they're too tricky:
   1487     # we use a very simple way to recognize these:
   1488     # " (something)(maybe-something)" or
   1489     # " (something)(maybe-something," or
   1490     # " (something)[something]"
   1491     # Note that we assume the contents of [] to be short enough that
   1492     # they'll never need to wrap.
   1493     if (  # Ignore control structures.
   1494         not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
   1495         # Ignore pointers/references to functions.
   1496         and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
   1497         # Ignore pointers/references to arrays.
   1498         and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
   1499         if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
   1500             error(line_number, 'whitespace/parens', 4,
   1501                   'Extra space after ( in function call')
   1502         elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
   1503             error(line_number, 'whitespace/parens', 2,
   1504                   'Extra space after (')
   1505         if (search(r'\w\s+\(', function_call)
   1506             and not match(r'\s*(#|typedef)', function_call)):
   1507             error(line_number, 'whitespace/parens', 4,
   1508                   'Extra space before ( in function call')
   1509         # If the ) is followed only by a newline or a { + newline, assume it's
   1510         # part of a control statement (if/while/etc), and don't complain
   1511         if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
   1512             error(line_number, 'whitespace/parens', 2,
   1513                   'Extra space before )')
   1514 
   1515 
   1516 def is_blank_line(line):
   1517     """Returns true if the given line is blank.
   1518 
   1519     We consider a line to be blank if the line is empty or consists of
   1520     only white spaces.
   1521 
   1522     Args:
   1523       line: A line of a string.
   1524 
   1525     Returns:
   1526       True, if the given line is blank.
   1527     """
   1528     return not line or line.isspace()
   1529 
   1530 
   1531 def detect_functions(clean_lines, line_number, function_state, error):
   1532     """Finds where functions start and end.
   1533 
   1534     Uses a simplistic algorithm assuming other style guidelines
   1535     (especially spacing) are followed.
   1536     Trivial bodies are unchecked, so constructors with huge initializer lists
   1537     may be missed.
   1538 
   1539     Args:
   1540       clean_lines: A CleansedLines instance containing the file.
   1541       line_number: The number of the line to check.
   1542       function_state: Current function name and lines in body so far.
   1543       error: The function to call with any errors found.
   1544     """
   1545     # Are we now past the end of a function?
   1546     if function_state.end_position.row + 1 == line_number:
   1547         function_state.end()
   1548 
   1549     # If we're in a function, don't try to detect a new one.
   1550     if function_state.in_a_function:
   1551         return
   1552 
   1553     lines = clean_lines.lines
   1554     line = lines[line_number]
   1555     raw = clean_lines.raw_lines
   1556     raw_line = raw[line_number]
   1557 
   1558     # Lines ending with a \ indicate a macro. Don't try to check them.
   1559     if raw_line.endswith('\\'):
   1560         return
   1561 
   1562     regexp = r'\s*(\w(\w|::|\*|\&|\s|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\('  # decls * & space::name( ...
   1563     match_result = match(regexp, line)
   1564     if not match_result:
   1565         return
   1566 
   1567     # If the name is all caps and underscores, figure it's a macro and
   1568     # ignore it, unless it's TEST or TEST_F.
   1569     function_name = match_result.group(1).split()[-1]
   1570     if function_name != 'TEST' and function_name != 'TEST_F' and match(r'[A-Z_]+$', function_name):
   1571         return
   1572 
   1573     joined_line = ''
   1574     for start_line_number in xrange(line_number, clean_lines.num_lines()):
   1575         start_line = clean_lines.elided[start_line_number]
   1576         joined_line += ' ' + start_line.lstrip()
   1577         body_match = search(r'{|;', start_line)
   1578         if body_match:
   1579             body_start_position = Position(start_line_number, body_match.start(0))
   1580 
   1581             # Replace template constructs with _ so that no spaces remain in the function name,
   1582             # while keeping the column numbers of other characters the same as "line".
   1583             line_with_no_templates = iteratively_replace_matches_with_char(r'<[^<>]*>', '_', line)
   1584             match_function = search(r'((\w|:|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(', line_with_no_templates)
   1585             if not match_function:
   1586                 return  # The '(' must have been inside of a template.
   1587 
   1588             # Use the column numbers from the modified line to find the
   1589             # function name in the original line.
   1590             function = line[match_function.start(1):match_function.end(1)]
   1591             function_name_start_position = Position(line_number, match_function.start(1))
   1592 
   1593             if match(r'TEST', function):    # Handle TEST... macros
   1594                 parameter_regexp = search(r'(\(.*\))', joined_line)
   1595                 if parameter_regexp:             # Ignore bad syntax
   1596                     function += parameter_regexp.group(1)
   1597             else:
   1598                 function += '()'
   1599 
   1600             parameter_start_position = Position(line_number, match_function.end(1))
   1601             parameter_end_position = close_expression(clean_lines.elided, parameter_start_position)
   1602             if parameter_end_position.row == len(clean_lines.elided):
   1603                 # No end was found.
   1604                 return
   1605 
   1606             if start_line[body_start_position.column] == ';':
   1607                 end_position = Position(body_start_position.row, body_start_position.column + 1)
   1608             else:
   1609                 end_position = close_expression(clean_lines.elided, body_start_position)
   1610 
   1611             # Check for nonsensical positions. (This happens in test cases which check code snippets.)
   1612             if parameter_end_position > body_start_position:
   1613                 return
   1614 
   1615             function_state.begin(function, function_name_start_position, body_start_position, end_position,
   1616                                  parameter_start_position, parameter_end_position, clean_lines)
   1617             return
   1618 
   1619     # No body for the function (or evidence of a non-function) was found.
   1620     error(line_number, 'readability/fn_size', 5,
   1621           'Lint failed to find start of function body.')
   1622 
   1623 
   1624 def check_for_function_lengths(clean_lines, line_number, function_state, error):
   1625     """Reports for long function bodies.
   1626 
   1627     For an overview why this is done, see:
   1628     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
   1629 
   1630     Blank/comment lines are not counted so as to avoid encouraging the removal
   1631     of vertical space and commments just to get through a lint check.
   1632     NOLINT *on the last line of a function* disables this check.
   1633 
   1634     Args:
   1635       clean_lines: A CleansedLines instance containing the file.
   1636       line_number: The number of the line to check.
   1637       function_state: Current function name and lines in body so far.
   1638       error: The function to call with any errors found.
   1639     """
   1640     lines = clean_lines.lines
   1641     line = lines[line_number]
   1642     raw = clean_lines.raw_lines
   1643     raw_line = raw[line_number]
   1644 
   1645     if function_state.end_position.row == line_number:  # last line
   1646         if not search(r'\bNOLINT\b', raw_line):
   1647             function_state.check(error, line_number)
   1648     elif not match(r'^\s*$', line):
   1649         function_state.count(line_number)  # Count non-blank/non-comment lines.
   1650 
   1651 
   1652 def _check_parameter_name_against_text(parameter, text, error):
   1653     """Checks to see if the parameter name is contained within the text.
   1654 
   1655     Return false if the check failed (i.e. an error was produced).
   1656     """
   1657 
   1658     # Treat 'lower with underscores' as a canonical form because it is
   1659     # case insensitive while still retaining word breaks. (This ensures that
   1660     # 'elate' doesn't look like it is duplicating of 'NateLate'.)
   1661     canonical_parameter_name = parameter.lower_with_underscores_name()
   1662 
   1663     # Appends "object" to all text to catch variables that did the same (but only
   1664     # do this when the parameter name is more than a single character to avoid
   1665     # flagging 'b' which may be an ok variable when used in an rgba function).
   1666     if len(canonical_parameter_name) > 1:
   1667         text = sub(r'(\w)\b', r'\1Object', text)
   1668     canonical_text = _convert_to_lower_with_underscores(text)
   1669 
   1670     # Used to detect cases like ec for ExceptionCode.
   1671     acronym = _create_acronym(text).lower()
   1672     if canonical_text.find(canonical_parameter_name) != -1 or acronym.find(canonical_parameter_name) != -1:
   1673         error(parameter.row, 'readability/parameter_name', 5,
   1674               'The parameter name "%s" adds no information, so it should be removed.' % parameter.name)
   1675         return False
   1676     return True
   1677 
   1678 
   1679 def check_function_definition_and_pass_ptr(type_text, row, location_description, error):
   1680     """Check that function definitions for use Pass*Ptr instead of *Ptr.
   1681 
   1682     Args:
   1683        type_text: A string containing the type. (For return values, it may contain more than the type.)
   1684        row: The row number of the type.
   1685        location_description: Used to indicate where the type is. This is either 'parameter' or 'return'.
   1686        error: The function to call with any errors found.
   1687     """
   1688     match_ref_or_own_ptr = '(?=\W|^)(Ref|Own)Ptr(?=\W)'
   1689     exceptions = '(?:&|\*|\*\s*=\s*0)$'
   1690     bad_type_usage = search(match_ref_or_own_ptr, type_text)
   1691     exception_usage = search(exceptions, type_text)
   1692     if not bad_type_usage or exception_usage:
   1693         return
   1694     type_name = bad_type_usage.group(0)
   1695     error(row, 'readability/pass_ptr', 5,
   1696           'The %s type should use Pass%s instead of %s.' % (location_description, type_name, type_name))
   1697 
   1698 
   1699 def check_function_definition(filename, file_extension, clean_lines, line_number, function_state, error):
   1700     """Check that function definitions for style issues.
   1701 
   1702     Specifically, check that parameter names in declarations add information.
   1703 
   1704     Args:
   1705        filename: Filename of the file that is being processed.
   1706        file_extension: The current file extension, without the leading dot.
   1707        clean_lines: A CleansedLines instance containing the file.
   1708        line_number: The number of the line to check.
   1709        function_state: Current function name and lines in body so far.
   1710        error: The function to call with any errors found.
   1711     """
   1712     if line_number != function_state.body_start_position.row:
   1713         return
   1714 
   1715     modifiers_and_return_type = function_state.modifiers_and_return_type()
   1716     if filename.find('/chromium/') != -1 and search(r'\bWEBKIT_EXPORT\b', modifiers_and_return_type):
   1717         if filename.find('/chromium/public/') == -1 and filename.find('/chromium/tests/') == -1 and filename.find('chromium/platform') == -1:
   1718             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1719                   'WEBKIT_EXPORT should only appear in the chromium public (or tests) directory.')
   1720         elif not file_extension == "h":
   1721             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1722                   'WEBKIT_EXPORT should only be used in header files.')
   1723         elif not function_state.is_declaration or search(r'\binline\b', modifiers_and_return_type):
   1724             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1725                   'WEBKIT_EXPORT should not be used on a function with a body.')
   1726         elif function_state.is_pure:
   1727             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1728                   'WEBKIT_EXPORT should not be used with a pure virtual function.')
   1729 
   1730     check_function_definition_and_pass_ptr(modifiers_and_return_type, function_state.function_name_start_position.row, 'return', error)
   1731 
   1732     parameter_list = function_state.parameter_list()
   1733     for parameter in parameter_list:
   1734         check_function_definition_and_pass_ptr(parameter.type, parameter.row, 'parameter', error)
   1735 
   1736         # Do checks specific to function declarations and parameter names.
   1737         if not function_state.is_declaration or not parameter.name:
   1738             continue
   1739 
   1740         # Check the parameter name against the function name for single parameter set functions.
   1741         if len(parameter_list) == 1 and match('set[A-Z]', function_state.current_function):
   1742             trimmed_function_name = function_state.current_function[len('set'):]
   1743             if not _check_parameter_name_against_text(parameter, trimmed_function_name, error):
   1744                 continue  # Since an error was noted for this name, move to the next parameter.
   1745 
   1746         # Check the parameter name against the type.
   1747         if not _check_parameter_name_against_text(parameter, parameter.type, error):
   1748             continue  # Since an error was noted for this name, move to the next parameter.
   1749 
   1750 
   1751 def check_pass_ptr_usage(clean_lines, line_number, function_state, error):
   1752     """Check for proper usage of Pass*Ptr.
   1753 
   1754     Currently this is limited to detecting declarations of Pass*Ptr
   1755     variables inside of functions.
   1756 
   1757     Args:
   1758       clean_lines: A CleansedLines instance containing the file.
   1759       line_number: The number of the line to check.
   1760       function_state: Current function name and lines in body so far.
   1761       error: The function to call with any errors found.
   1762     """
   1763     if not function_state.in_a_function:
   1764         return
   1765 
   1766     lines = clean_lines.lines
   1767     line = lines[line_number]
   1768     if line_number > function_state.body_start_position.row:
   1769         matched_pass_ptr = match(r'^\s*Pass([A-Z][A-Za-z]*)Ptr<', line)
   1770         if matched_pass_ptr:
   1771             type_name = 'Pass%sPtr' % matched_pass_ptr.group(1)
   1772             error(line_number, 'readability/pass_ptr', 5,
   1773                   'Local variables should never be %s (see '
   1774                   'http://webkit.org/coding/RefPtr.html).' % type_name)
   1775 
   1776 
   1777 def check_for_leaky_patterns(clean_lines, line_number, function_state, error):
   1778     """Check for constructs known to be leak prone.
   1779     Args:
   1780       clean_lines: A CleansedLines instance containing the file.
   1781       line_number: The number of the line to check.
   1782       function_state: Current function name and lines in body so far.
   1783       error: The function to call with any errors found.
   1784     """
   1785     lines = clean_lines.lines
   1786     line = lines[line_number]
   1787 
   1788     matched_get_dc = search(r'\b(?P<function_name>GetDC(Ex)?)\s*\(', line)
   1789     if matched_get_dc:
   1790         error(line_number, 'runtime/leaky_pattern', 5,
   1791               'Use the class HWndDC instead of calling %s to avoid potential '
   1792               'memory leaks.' % matched_get_dc.group('function_name'))
   1793 
   1794     matched_create_dc = search(r'\b(?P<function_name>Create(Compatible)?DC)\s*\(', line)
   1795     matched_own_dc = search(r'\badoptPtr\b', line)
   1796     if matched_create_dc and not matched_own_dc:
   1797         error(line_number, 'runtime/leaky_pattern', 5,
   1798               'Use adoptPtr and OwnPtr<HDC> when calling %s to avoid potential '
   1799               'memory leaks.' % matched_create_dc.group('function_name'))
   1800 
   1801 
   1802 def check_spacing(file_extension, clean_lines, line_number, error):
   1803     """Checks for the correctness of various spacing issues in the code.
   1804 
   1805     Things we check for: spaces around operators, spaces after
   1806     if/for/while/switch, no spaces around parens in function calls, two
   1807     spaces between code and comment, don't start a block with a blank
   1808     line, don't end a function with a blank line, don't have too many
   1809     blank lines in a row.
   1810 
   1811     Args:
   1812       file_extension: The current file extension, without the leading dot.
   1813       clean_lines: A CleansedLines instance containing the file.
   1814       line_number: The number of the line to check.
   1815       error: The function to call with any errors found.
   1816     """
   1817 
   1818     raw = clean_lines.raw_lines
   1819     line = raw[line_number]
   1820 
   1821     # Before nixing comments, check if the line is blank for no good
   1822     # reason.  This includes the first line after a block is opened, and
   1823     # blank lines at the end of a function (ie, right before a line like '}').
   1824     if is_blank_line(line):
   1825         elided = clean_lines.elided
   1826         previous_line = elided[line_number - 1]
   1827         previous_brace = previous_line.rfind('{')
   1828         # FIXME: Don't complain if line before blank line, and line after,
   1829         #        both start with alnums and are indented the same amount.
   1830         #        This ignores whitespace at the start of a namespace block
   1831         #        because those are not usually indented.
   1832         if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
   1833             and previous_line[:previous_brace].find('namespace') == -1):
   1834             # OK, we have a blank line at the start of a code block.  Before we
   1835             # complain, we check if it is an exception to the rule: The previous
   1836             # non-empty line has the parameters of a function header that are indented
   1837             # 4 spaces (because they did not fit in a 80 column line when placed on
   1838             # the same line as the function name).  We also check for the case where
   1839             # the previous line is indented 6 spaces, which may happen when the
   1840             # initializers of a constructor do not fit into a 80 column line.
   1841             exception = False
   1842             if match(r' {6}\w', previous_line):  # Initializer list?
   1843                 # We are looking for the opening column of initializer list, which
   1844                 # should be indented 4 spaces to cause 6 space indentation afterwards.
   1845                 search_position = line_number - 2
   1846                 while (search_position >= 0
   1847                        and match(r' {6}\w', elided[search_position])):
   1848                     search_position -= 1
   1849                 exception = (search_position >= 0
   1850                              and elided[search_position][:5] == '    :')
   1851             else:
   1852                 # Search for the function arguments or an initializer list.  We use a
   1853                 # simple heuristic here: If the line is indented 4 spaces; and we have a
   1854                 # closing paren, without the opening paren, followed by an opening brace
   1855                 # or colon (for initializer lists) we assume that it is the last line of
   1856                 # a function header.  If we have a colon indented 4 spaces, it is an
   1857                 # initializer list.
   1858                 exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
   1859                                    previous_line)
   1860                              or match(r' {4}:', previous_line))
   1861 
   1862             if not exception:
   1863                 error(line_number, 'whitespace/blank_line', 2,
   1864                       'Blank line at the start of a code block.  Is this needed?')
   1865         # This doesn't ignore whitespace at the end of a namespace block
   1866         # because that is too hard without pairing open/close braces;
   1867         # however, a special exception is made for namespace closing
   1868         # brackets which have a comment containing "namespace".
   1869         #
   1870         # Also, ignore blank lines at the end of a block in a long if-else
   1871         # chain, like this:
   1872         #   if (condition1) {
   1873         #     // Something followed by a blank line
   1874         #
   1875         #   } else if (condition2) {
   1876         #     // Something else
   1877         #   }
   1878         if line_number + 1 < clean_lines.num_lines():
   1879             next_line = raw[line_number + 1]
   1880             if (next_line
   1881                 and match(r'\s*}', next_line)
   1882                 and next_line.find('namespace') == -1
   1883                 and next_line.find('} else ') == -1):
   1884                 error(line_number, 'whitespace/blank_line', 3,
   1885                       'Blank line at the end of a code block.  Is this needed?')
   1886 
   1887     # Next, we check for proper spacing with respect to comments.
   1888     comment_position = line.find('//')
   1889     if comment_position != -1:
   1890         # Check if the // may be in quotes.  If so, ignore it
   1891         # Comparisons made explicit for clarity
   1892         if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
   1893             # Allow one space before end of line comment.
   1894             if (not match(r'^\s*$', line[:comment_position])
   1895                 and (comment_position >= 1
   1896                 and ((line[comment_position - 1] not in string.whitespace)
   1897                      or (comment_position >= 2
   1898                          and line[comment_position - 2] in string.whitespace)))):
   1899                 error(line_number, 'whitespace/comments', 5,
   1900                       'One space before end of line comments')
   1901             # There should always be a space between the // and the comment
   1902             commentend = comment_position + 2
   1903             if commentend < len(line) and not line[commentend] == ' ':
   1904                 # but some lines are exceptions -- e.g. if they're big
   1905                 # comment delimiters like:
   1906                 # //----------------------------------------------------------
   1907                 # or they begin with multiple slashes followed by a space:
   1908                 # //////// Header comment
   1909                 matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
   1910                            or search(r'^/+ ', line[commentend:]))
   1911                 if not matched:
   1912                     error(line_number, 'whitespace/comments', 4,
   1913                           'Should have a space between // and comment')
   1914 
   1915             # There should only be one space after punctuation in a comment.
   1916             if search(r'[.!?,;:]\s\s+\w', line[comment_position:]):
   1917                 error(line_number, 'whitespace/comments', 5,
   1918                       'Should have only a single space after a punctuation in a comment.')
   1919 
   1920     line = clean_lines.elided[line_number]  # get rid of comments and strings
   1921 
   1922     # Don't try to do spacing checks for operator methods
   1923     line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>|\+=|-=|\*=|/=|%=|&=|\|=|^=|<<=|>>=|/)\(', 'operator\(', line)
   1924     # Don't try to do spacing checks for #include or #import statements at
   1925     # minimum because it messes up checks for spacing around /
   1926     if match(r'\s*#\s*(?:include|import)', line):
   1927         return
   1928     if search(r'[\w.]=[\w.]', line):
   1929         error(line_number, 'whitespace/operators', 4,
   1930               'Missing spaces around =')
   1931 
   1932     # FIXME: It's not ok to have spaces around binary operators like .
   1933 
   1934     # You should always have whitespace around binary operators.
   1935     # Alas, we can't test < or > because they're legitimately used sans spaces
   1936     # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
   1937     # only if it's not template params list spilling into the next line.
   1938     matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line)
   1939     if not matched:
   1940         # Note that while it seems that the '<[^<]*' term in the following
   1941         # regexp could be simplified to '<.*', which would indeed match
   1942         # the same class of strings, the [^<] means that searching for the
   1943         # regexp takes linear rather than quadratic time.
   1944         if not search(r'<[^<]*,\s*$', line):  # template params spill
   1945             matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
   1946     if matched:
   1947         error(line_number, 'whitespace/operators', 3,
   1948               'Missing spaces around %s' % matched.group(1))
   1949 
   1950     # There shouldn't be space around unary operators
   1951     matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
   1952     if matched:
   1953         error(line_number, 'whitespace/operators', 4,
   1954               'Extra space for operator %s' % matched.group(1))
   1955 
   1956     # A pet peeve of mine: no spaces after an if, while, switch, or for
   1957     matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
   1958     if matched:
   1959         error(line_number, 'whitespace/parens', 5,
   1960               'Missing space before ( in %s' % matched.group(1))
   1961 
   1962     # For if/for/foreach/while/switch, the left and right parens should be
   1963     # consistent about how many spaces are inside the parens, and
   1964     # there should either be zero or one spaces inside the parens.
   1965     # We don't want: "if ( foo)" or "if ( foo   )".
   1966     # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   1967     matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<remainder>.*)$', line)
   1968     if matched:
   1969         statement = matched.group('statement')
   1970         condition, rest = up_to_unmatched_closing_paren(matched.group('remainder'))
   1971         if condition is not None:
   1972             condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition)
   1973             if condition_match:
   1974                 n_leading = len(condition_match.group('leading'))
   1975                 n_trailing = len(condition_match.group('trailing'))
   1976                 if n_leading != 0:
   1977                     for_exception = statement == 'for' and condition.startswith(' ;')
   1978                     if not for_exception:
   1979                         error(line_number, 'whitespace/parens', 5,
   1980                               'Extra space after ( in %s' % statement)
   1981                 if n_trailing != 0:
   1982                     for_exception = statement == 'for' and condition.endswith('; ')
   1983                     if not for_exception:
   1984                         error(line_number, 'whitespace/parens', 5,
   1985                               'Extra space before ) in %s' % statement)
   1986 
   1987             # Do not check for more than one command in macros
   1988             in_preprocessor_directive = match(r'\s*#', line)
   1989             if not in_preprocessor_directive and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest):
   1990                 error(line_number, 'whitespace/parens', 4,
   1991                       'More than one command on the same line in %s' % statement)
   1992 
   1993     # You should always have a space after a comma (either as fn arg or operator)
   1994     if search(r',[^\s]', line):
   1995         error(line_number, 'whitespace/comma', 3,
   1996               'Missing space after ,')
   1997 
   1998     matched = search(r'^\s*(?P<token1>[a-zA-Z0-9_\*&]+)\s\s+(?P<token2>[a-zA-Z0-9_\*&]+)', line)
   1999     if matched:
   2000         error(line_number, 'whitespace/declaration', 3,
   2001               'Extra space between %s and %s' % (matched.group('token1'), matched.group('token2')))
   2002 
   2003     if file_extension == 'cpp':
   2004         # C++ should have the & or * beside the type not the variable name.
   2005         matched = match(r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+', line)
   2006         if matched:
   2007             error(line_number, 'whitespace/declaration', 3,
   2008                   'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
   2009 
   2010     elif file_extension == 'c':
   2011         # C Pointer declaration should have the * beside the variable not the type name.
   2012         matched = search(r'^\s*\w+\*\s+\w+', line)
   2013         if matched:
   2014             error(line_number, 'whitespace/declaration', 3,
   2015                   'Declaration has space between * and variable name in %s' % matched.group(0).strip())
   2016 
   2017     # Next we will look for issues with function calls.
   2018     check_spacing_for_function_call(line, line_number, error)
   2019 
   2020     # Except after an opening paren, you should have spaces before your braces.
   2021     # And since you should never have braces at the beginning of a line, this is
   2022     # an easy test.
   2023     if search(r'[^ ({]{', line):
   2024         error(line_number, 'whitespace/braces', 5,
   2025               'Missing space before {')
   2026 
   2027     # Make sure '} else {' has spaces.
   2028     if search(r'}else', line):
   2029         error(line_number, 'whitespace/braces', 5,
   2030               'Missing space before else')
   2031 
   2032     # You shouldn't have spaces before your brackets, except maybe after
   2033     # 'delete []' or 'new char * []'.
   2034     if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
   2035         error(line_number, 'whitespace/braces', 5,
   2036               'Extra space before [')
   2037 
   2038     # There should always be a single space in between braces on the same line.
   2039     if search(r'\{\}', line):
   2040         error(line_number, 'whitespace/braces', 5, 'Missing space inside { }.')
   2041     if search(r'\{\s\s+\}', line):
   2042         error(line_number, 'whitespace/braces', 5, 'Too many spaces inside { }.')
   2043 
   2044     # You shouldn't have a space before a semicolon at the end of the line.
   2045     # There's a special case for "for" since the style guide allows space before
   2046     # the semicolon there.
   2047     if search(r':\s*;\s*$', line):
   2048         error(line_number, 'whitespace/semicolon', 5,
   2049               'Semicolon defining empty statement. Use { } instead.')
   2050     elif search(r'^\s*;\s*$', line):
   2051         error(line_number, 'whitespace/semicolon', 5,
   2052               'Line contains only semicolon. If this should be an empty statement, '
   2053               'use { } instead.')
   2054     elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
   2055         error(line_number, 'whitespace/semicolon', 5,
   2056               'Extra space before last semicolon. If this should be an empty '
   2057               'statement, use { } instead.')
   2058     elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
   2059           and line.count('(') == line.count(')')
   2060           # Allow do {} while();
   2061           and not search(r'}\s*while', line)):
   2062         error(line_number, 'whitespace/semicolon', 5,
   2063               'Semicolon defining empty statement for this loop. Use { } instead.')
   2064 
   2065 
   2066 def get_previous_non_blank_line(clean_lines, line_number):
   2067     """Return the most recent non-blank line and its line number.
   2068 
   2069     Args:
   2070       clean_lines: A CleansedLines instance containing the file contents.
   2071       line_number: The number of the line to check.
   2072 
   2073     Returns:
   2074       A tuple with two elements.  The first element is the contents of the last
   2075       non-blank line before the current line, or the empty string if this is the
   2076       first non-blank line.  The second is the line number of that line, or -1
   2077       if this is the first non-blank line.
   2078     """
   2079 
   2080     previous_line_number = line_number - 1
   2081     while previous_line_number >= 0:
   2082         previous_line = clean_lines.elided[previous_line_number]
   2083         if not is_blank_line(previous_line):     # if not a blank line...
   2084             return (previous_line, previous_line_number)
   2085         previous_line_number -= 1
   2086     return ('', -1)
   2087 
   2088 
   2089 def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error):
   2090     """Looks for indentation errors inside of namespaces.
   2091 
   2092     Args:
   2093       clean_lines: A CleansedLines instance containing the file.
   2094       line_number: The number of the line to check.
   2095       file_extension: The extension (dot not included) of the file.
   2096       file_state: A _FileState instance which maintains information about
   2097                   the state of things in the file.
   2098       error: The function to call with any errors found.
   2099     """
   2100 
   2101     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2102 
   2103     namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
   2104     if not namespace_match:
   2105         return
   2106 
   2107     current_indentation_level = len(namespace_match.group('namespace_indentation'))
   2108     if current_indentation_level > 0:
   2109         # Don't warn about an indented namespace if we already warned about indented code.
   2110         if not file_state.did_inside_namespace_indent_warning():
   2111             error(line_number, 'whitespace/indent', 4,
   2112                   'namespace should never be indented.')
   2113         return
   2114     looking_for_semicolon = False;
   2115     line_offset = 0
   2116     in_preprocessor_directive = False;
   2117     for current_line in clean_lines.elided[line_number + 1:]:
   2118         line_offset += 1
   2119         if not current_line.strip():
   2120             continue
   2121         if not current_indentation_level:
   2122             if not (in_preprocessor_directive or looking_for_semicolon):
   2123                 if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning():
   2124                     file_state.set_did_inside_namespace_indent_warning()
   2125                     error(line_number + line_offset, 'whitespace/indent', 4,
   2126                           'Code inside a namespace should not be indented.')
   2127             if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax.
   2128                 in_preprocessor_directive = current_line[-1] == '\\'
   2129             else:
   2130                 looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\')
   2131         else:
   2132             looking_for_semicolon = False; # If we have a brace we may not need a semicolon.
   2133         current_indentation_level += current_line.count('{') - current_line.count('}')
   2134         if current_indentation_level < 0:
   2135             break;
   2136 
   2137 
   2138 def check_enum_casing(clean_lines, line_number, enum_state, error):
   2139     """Looks for incorrectly named enum values.
   2140 
   2141     Args:
   2142       clean_lines: A CleansedLines instance containing the file.
   2143       line_number: The number of the line to check.
   2144       enum_state: A _EnumState instance which maintains enum declaration state.
   2145       error: The function to call with any errors found.
   2146     """
   2147 
   2148     enum_state.is_webidl_enum |= bool(match(r'\s*// Web(?:Kit)?IDL enum\s*$', clean_lines.raw_lines[line_number]))
   2149 
   2150     line = clean_lines.elided[line_number]  # Get rid of comments and strings.
   2151     if not enum_state.process_clean_line(line):
   2152         error(line_number, 'readability/enum_casing', 4,
   2153               'enum members should use InterCaps with an initial capital letter.')
   2154 
   2155 def check_directive_indentation(clean_lines, line_number, file_state, error):
   2156     """Looks for indentation of preprocessor directives.
   2157 
   2158     Args:
   2159       clean_lines: A CleansedLines instance containing the file.
   2160       line_number: The number of the line to check.
   2161       file_state: A _FileState instance which maintains information about
   2162                   the state of things in the file.
   2163       error: The function to call with any errors found.
   2164     """
   2165 
   2166     line = clean_lines.elided[line_number]  # Get rid of comments and strings.
   2167 
   2168     indented_preprocessor_directives = match(r'\s+#', line)
   2169     if not indented_preprocessor_directives:
   2170         return
   2171 
   2172     error(line_number, 'whitespace/indent', 4, 'preprocessor directives (e.g., #ifdef, #define, #import) should never be indented.')
   2173 
   2174 
   2175 def get_initial_spaces_for_line(clean_line):
   2176     initial_spaces = 0
   2177     while initial_spaces < len(clean_line) and clean_line[initial_spaces] == ' ':
   2178         initial_spaces += 1
   2179     return initial_spaces
   2180 
   2181 
   2182 def check_indentation_amount(clean_lines, line_number, error):
   2183     line = clean_lines.elided[line_number]
   2184     initial_spaces = get_initial_spaces_for_line(line)
   2185 
   2186     if initial_spaces % 4:
   2187         error(line_number, 'whitespace/indent', 3,
   2188               'Weird number of spaces at line-start.  Are you using a 4-space indent?')
   2189         return
   2190 
   2191     previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2192     if not previous_line.strip() or match(r'\s*\w+\s*:\s*$', previous_line) or previous_line[0] == '#':
   2193         return
   2194 
   2195     previous_line_initial_spaces = get_initial_spaces_for_line(previous_line)
   2196     if initial_spaces > previous_line_initial_spaces + 4:
   2197         error(line_number, 'whitespace/indent', 3, 'When wrapping a line, only indent 4 spaces.')
   2198 
   2199 
   2200 def check_using_std(clean_lines, line_number, file_state, error):
   2201     """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'.
   2202 
   2203     Args:
   2204       clean_lines: A CleansedLines instance containing the file.
   2205       line_number: The number of the line to check.
   2206       file_state: A _FileState instance which maintains information about
   2207                   the state of things in the file.
   2208       error: The function to call with any errors found.
   2209     """
   2210 
   2211     # This check doesn't apply to C or Objective-C implementation files.
   2212     if file_state.is_c_or_objective_c():
   2213         return
   2214 
   2215     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2216 
   2217     using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line)
   2218     if not using_std_match:
   2219         return
   2220 
   2221     method_name = using_std_match.group('method_name')
   2222     error(line_number, 'build/using_std', 4,
   2223           "Use 'using namespace std;' instead of 'using std::%s;'." % method_name)
   2224 
   2225 
   2226 def check_max_min_macros(clean_lines, line_number, file_state, error):
   2227     """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min().
   2228 
   2229     Args:
   2230       clean_lines: A CleansedLines instance containing the file.
   2231       line_number: The number of the line to check.
   2232       file_state: A _FileState instance which maintains information about
   2233                   the state of things in the file.
   2234       error: The function to call with any errors found.
   2235     """
   2236 
   2237     # This check doesn't apply to C or Objective-C implementation files.
   2238     if file_state.is_c_or_objective_c():
   2239         return
   2240 
   2241     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2242 
   2243     max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line)
   2244     if not max_min_macros_search:
   2245         return
   2246 
   2247     max_min_macro = max_min_macros_search.group('max_min_macro')
   2248     max_min_macro_lower = max_min_macro.lower()
   2249     error(line_number, 'runtime/max_min_macros', 4,
   2250           'Use std::%s() or std::%s<type>() instead of the %s() macro.'
   2251           % (max_min_macro_lower, max_min_macro_lower, max_min_macro))
   2252 
   2253 
   2254 def check_ctype_functions(clean_lines, line_number, file_state, error):
   2255     """Looks for use of the standard functions in ctype.h and suggest they be replaced
   2256        by use of equivilent ones in <wtf/ASCIICType.h>?.
   2257 
   2258     Args:
   2259       clean_lines: A CleansedLines instance containing the file.
   2260       line_number: The number of the line to check.
   2261       file_state: A _FileState instance which maintains information about
   2262                   the state of things in the file.
   2263       error: The function to call with any errors found.
   2264     """
   2265 
   2266     line = clean_lines.elided[line_number]  # Get rid of comments and strings.
   2267 
   2268     ctype_function_search = search(r'\b(?P<ctype_function>(isalnum|isalpha|isascii|isblank|iscntrl|isdigit|isgraph|islower|isprint|ispunct|isspace|isupper|isxdigit|toascii|tolower|toupper))\s*\(', line)
   2269     if not ctype_function_search:
   2270         return
   2271 
   2272     ctype_function = ctype_function_search.group('ctype_function')
   2273     error(line_number, 'runtime/ctype_function', 4,
   2274           'Use equivelent function in <wtf/ASCIICType.h> instead of the %s() function.'
   2275           % (ctype_function))
   2276 
   2277 def check_switch_indentation(clean_lines, line_number, error):
   2278     """Looks for indentation errors inside of switch statements.
   2279 
   2280     Args:
   2281       clean_lines: A CleansedLines instance containing the file.
   2282       line_number: The number of the line to check.
   2283       error: The function to call with any errors found.
   2284     """
   2285 
   2286     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2287 
   2288     switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
   2289     if not switch_match:
   2290         return
   2291 
   2292     switch_indentation = switch_match.group('switch_indentation')
   2293     inner_indentation = switch_indentation + ' ' * 4
   2294     line_offset = 0
   2295     encountered_nested_switch = False
   2296 
   2297     for current_line in clean_lines.elided[line_number + 1:]:
   2298         line_offset += 1
   2299 
   2300         # Skip not only empty lines but also those with preprocessor directives.
   2301         if current_line.strip() == '' or current_line.startswith('#'):
   2302             continue
   2303 
   2304         if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
   2305             # Complexity alarm - another switch statement nested inside the one
   2306             # that we're currently testing. We'll need to track the extent of
   2307             # that inner switch if the upcoming label tests are still supposed
   2308             # to work correctly. Let's not do that; instead, we'll finish
   2309             # checking this line, and then leave it like that. Assuming the
   2310             # indentation is done consistently (even if incorrectly), this will
   2311             # still catch all indentation issues in practice.
   2312             encountered_nested_switch = True
   2313 
   2314         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   2315         current_indentation = current_indentation_match.group('indentation')
   2316         remaining_line = current_indentation_match.group('remaining_line')
   2317 
   2318         # End the check at the end of the switch statement.
   2319         if remaining_line.startswith('}') and current_indentation == switch_indentation:
   2320             break
   2321         # Case and default branches should not be indented. The regexp also
   2322         # catches single-line cases like "default: break;" but does not trigger
   2323         # on stuff like "Document::Foo();".
   2324         elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
   2325             if current_indentation != switch_indentation:
   2326                 error(line_number + line_offset, 'whitespace/indent', 4,
   2327                       'A case label should not be indented, but line up with its switch statement.')
   2328                 # Don't throw an error for multiple badly indented labels,
   2329                 # one should be enough to figure out the problem.
   2330                 break
   2331         # We ignore goto labels at the very beginning of a line.
   2332         elif match(r'\w+\s*:\s*$', remaining_line):
   2333             continue
   2334         # It's not a goto label, so check if it's indented at least as far as
   2335         # the switch statement plus one more level of indentation.
   2336         elif not current_indentation.startswith(inner_indentation):
   2337             error(line_number + line_offset, 'whitespace/indent', 4,
   2338                   'Non-label code inside switch statements should be indented.')
   2339             # Don't throw an error for multiple badly indented statements,
   2340             # one should be enough to figure out the problem.
   2341             break
   2342 
   2343         if encountered_nested_switch:
   2344             break
   2345 
   2346 
   2347 def check_braces(clean_lines, line_number, error):
   2348     """Looks for misplaced braces (e.g. at the end of line).
   2349 
   2350     Args:
   2351       clean_lines: A CleansedLines instance containing the file.
   2352       line_number: The number of the line to check.
   2353       error: The function to call with any errors found.
   2354     """
   2355 
   2356     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2357 
   2358     if match(r'\s*{\s*$', line):
   2359         # We allow an open brace to start a line in the case where someone
   2360         # is using braces for function definition or in a block to
   2361         # explicitly create a new scope, which is commonly used to control
   2362         # the lifetime of stack-allocated variables.  We don't detect this
   2363         # perfectly: we just don't complain if the last non-whitespace
   2364         # character on the previous non-blank line is ';', ':', '{', '}',
   2365         # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
   2366         # We also allow '#' for #endif and '=' for array initialization.
   2367         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2368         if ((not search(r'[;:}{)=]\s*$|\)\s*((const|OVERRIDE)\s*)*\s*$', previous_line)
   2369              or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
   2370             and previous_line.find('#') < 0):
   2371             error(line_number, 'whitespace/braces', 4,
   2372                   'This { should be at the end of the previous line')
   2373     elif (search(r'\)\s*(((const|OVERRIDE)\s*)*\s*)?{\s*$', line)
   2374           and line.count('(') == line.count(')')
   2375           and not search(r'\b(if|for|foreach|while|switch)\b', line)
   2376           and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)):
   2377         error(line_number, 'whitespace/braces', 4,
   2378               'Place brace on its own line for function definitions.')
   2379 
   2380     # An else clause should be on the same line as the preceding closing brace.
   2381     if match(r'\s*else\s*', line):
   2382         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2383         if match(r'\s*}\s*$', previous_line):
   2384             error(line_number, 'whitespace/newline', 4,
   2385                   'An else should appear on the same line as the preceding }')
   2386 
   2387     # Likewise, an else should never have the else clause on the same line
   2388     if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
   2389         error(line_number, 'whitespace/newline', 4,
   2390               'Else clause should never be on same line as else (use 2 lines)')
   2391 
   2392     # In the same way, a do/while should never be on one line
   2393     if match(r'\s*do [^\s{]', line):
   2394         error(line_number, 'whitespace/newline', 4,
   2395               'do/while clauses should not be on a single line')
   2396 
   2397     # Braces shouldn't be followed by a ; unless they're defining a struct
   2398     # or initializing an array.
   2399     # We can't tell in general, but we can for some common cases.
   2400     previous_line_number = line_number
   2401     while True:
   2402         (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
   2403         if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
   2404             line = previous_line + line
   2405         else:
   2406             break
   2407     if (search(r'{.*}\s*;', line)
   2408         and line.count('{') == line.count('}')
   2409         and not search(r'struct|class|enum|\s*=\s*{', line)):
   2410         error(line_number, 'readability/braces', 4,
   2411               "You don't need a ; after a }")
   2412 
   2413 
   2414 def check_exit_statement_simplifications(clean_lines, line_number, error):
   2415     """Looks for else or else-if statements that should be written as an
   2416     if statement when the prior if concludes with a return, break, continue or
   2417     goto statement.
   2418 
   2419     Args:
   2420       clean_lines: A CleansedLines instance containing the file.
   2421       line_number: The number of the line to check.
   2422       error: The function to call with any errors found.
   2423     """
   2424 
   2425     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2426 
   2427     else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
   2428     if not else_match:
   2429         return
   2430 
   2431     else_indentation = else_match.group('else_indentation')
   2432     inner_indentation = else_indentation + ' ' * 4
   2433 
   2434     previous_lines = clean_lines.elided[:line_number]
   2435     previous_lines.reverse()
   2436     line_offset = 0
   2437     encountered_exit_statement = False
   2438 
   2439     for current_line in previous_lines:
   2440         line_offset -= 1
   2441 
   2442         # Skip not only empty lines but also those with preprocessor directives
   2443         # and goto labels.
   2444         if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
   2445             continue
   2446 
   2447         # Skip lines with closing braces on the original indentation level.
   2448         # Even though the styleguide says they should be on the same line as
   2449         # the "else if" statement, we also want to check for instances where
   2450         # the current code does not comply with the coding style. Thus, ignore
   2451         # these lines and proceed to the line before that.
   2452         if current_line == else_indentation + '}':
   2453             continue
   2454 
   2455         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   2456         current_indentation = current_indentation_match.group('indentation')
   2457         remaining_line = current_indentation_match.group('remaining_line')
   2458 
   2459         # As we're going up the lines, the first real statement to encounter
   2460         # has to be an exit statement (return, break, continue or goto) -
   2461         # otherwise, this check doesn't apply.
   2462         if not encountered_exit_statement:
   2463             # We only want to find exit statements if they are on exactly
   2464             # the same level of indentation as expected from the code inside
   2465             # the block. If the indentation doesn't strictly match then we
   2466             # might have a nested if or something, which must be ignored.
   2467             if current_indentation != inner_indentation:
   2468                 break
   2469             if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
   2470                 encountered_exit_statement = True
   2471                 continue
   2472             break
   2473 
   2474         # When code execution reaches this point, we've found an exit statement
   2475         # as last statement of the previous block. Now we only need to make
   2476         # sure that the block belongs to an "if", then we can throw an error.
   2477 
   2478         # Skip lines with opening braces on the original indentation level,
   2479         # similar to the closing braces check above. ("if (condition)\n{")
   2480         if current_line == else_indentation + '{':
   2481             continue
   2482 
   2483         # Skip everything that's further indented than our "else" or "else if".
   2484         if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
   2485             continue
   2486 
   2487         # So we've got a line with same (or less) indentation. Is it an "if"?
   2488         # If yes: throw an error. If no: don't throw an error.
   2489         # Whatever the outcome, this is the end of our loop.
   2490         if match(r'if\s*\(', remaining_line):
   2491             if else_match.start('else') != -1:
   2492                 error(line_number + line_offset, 'readability/control_flow', 4,
   2493                       'An else statement can be removed when the prior "if" '
   2494                       'concludes with a return, break, continue or goto statement.')
   2495             else:
   2496                 error(line_number + line_offset, 'readability/control_flow', 4,
   2497                       'An else if statement should be written as an if statement '
   2498                       'when the prior "if" concludes with a return, break, '
   2499                       'continue or goto statement.')
   2500         break
   2501 
   2502 
   2503 def replaceable_check(operator, macro, line):
   2504     """Determine whether a basic CHECK can be replaced with a more specific one.
   2505 
   2506     For example suggest using CHECK_EQ instead of CHECK(a == b) and
   2507     similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
   2508 
   2509     Args:
   2510       operator: The C++ operator used in the CHECK.
   2511       macro: The CHECK or EXPECT macro being called.
   2512       line: The current source line.
   2513 
   2514     Returns:
   2515       True if the CHECK can be replaced with a more specific one.
   2516     """
   2517 
   2518     # This matches decimal and hex integers, strings, and chars (in that order).
   2519     match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
   2520 
   2521     # Expression to match two sides of the operator with something that
   2522     # looks like a literal, since CHECK(x == iterator) won't compile.
   2523     # This means we can't catch all the cases where a more specific
   2524     # CHECK is possible, but it's less annoying than dealing with
   2525     # extraneous warnings.
   2526     match_this = (r'\s*' + macro + r'\((\s*' +
   2527                   match_constant + r'\s*' + operator + r'[^<>].*|'
   2528                   r'.*[^<>]' + operator + r'\s*' + match_constant +
   2529                   r'\s*\))')
   2530 
   2531     # Don't complain about CHECK(x == NULL) or similar because
   2532     # CHECK_EQ(x, NULL) won't compile (requires a cast).
   2533     # Also, don't complain about more complex boolean expressions
   2534     # involving && or || such as CHECK(a == b || c == d).
   2535     return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
   2536 
   2537 
   2538 def check_check(clean_lines, line_number, error):
   2539     """Checks the use of CHECK and EXPECT macros.
   2540 
   2541     Args:
   2542       clean_lines: A CleansedLines instance containing the file.
   2543       line_number: The number of the line to check.
   2544       error: The function to call with any errors found.
   2545     """
   2546 
   2547     # Decide the set of replacement macros that should be suggested
   2548     raw_lines = clean_lines.raw_lines
   2549     current_macro = ''
   2550     for macro in _CHECK_MACROS:
   2551         if raw_lines[line_number].find(macro) >= 0:
   2552             current_macro = macro
   2553             break
   2554     if not current_macro:
   2555         # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
   2556         return
   2557 
   2558     line = clean_lines.elided[line_number]        # get rid of comments and strings
   2559 
   2560     # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
   2561     for operator in ['==', '!=', '>=', '>', '<=', '<']:
   2562         if replaceable_check(operator, current_macro, line):
   2563             error(line_number, 'readability/check', 2,
   2564                   'Consider using %s instead of %s(a %s b)' % (
   2565                       _CHECK_REPLACEMENT[current_macro][operator],
   2566                       current_macro, operator))
   2567             break
   2568 
   2569 
   2570 def check_for_comparisons_to_zero(clean_lines, line_number, error):
   2571     # Get the line without comments and strings.
   2572     line = clean_lines.elided[line_number]
   2573 
   2574     # Include NULL here so that users don't have to convert NULL to 0 first and then get this error.
   2575     if search(r'[=!]=\s*(NULL|0|true|false)[^\w.]', line) or search(r'[^\w.](NULL|0|true|false)\s*[=!]=', line):
   2576         if not search('LIKELY', line) and not search('UNLIKELY', line):
   2577             error(line_number, 'readability/comparison_to_zero', 5,
   2578                   'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.')
   2579 
   2580 
   2581 def check_for_null(clean_lines, line_number, file_state, error):
   2582     # This check doesn't apply to C or Objective-C implementation files.
   2583     if file_state.is_c_or_objective_c():
   2584         return
   2585 
   2586     line = clean_lines.elided[line_number]
   2587 
   2588     # Don't warn about NULL usage in g_*(). See Bug 32858 and 39372.
   2589     if search(r'\bg(_[a-z]+)+\b', line):
   2590         return
   2591 
   2592     # Don't warn about NULL usage in gst_*(). See Bug 70498.
   2593     if search(r'\bgst(_[a-z]+)+\b', line):
   2594         return
   2595 
   2596     # Don't warn about NULL usage in gdk_pixbuf_save_to_*{join,concat}(). See Bug 43090.
   2597     if search(r'\bgdk_pixbuf_save_to\w+\b', line):
   2598         return
   2599 
   2600     # Don't warn about NULL usage in gtk_widget_style_get(), gtk_style_context_get_style(), or gtk_style_context_get(). See Bug 51758
   2601     if search(r'\bgtk_widget_style_get\(\w+\b', line) or search(r'\bgtk_style_context_get_style\(\w+\b', line) or search(r'\bgtk_style_context_get\(\w+\b', line):
   2602         return
   2603 
   2604     # Don't warn about NULL usage in soup_server_new(). See Bug 77890.
   2605     if search(r'\bsoup_server_new\(\w+\b', line):
   2606         return
   2607 
   2608     if search(r'\bNULL\b', line):
   2609         error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
   2610         return
   2611 
   2612     line = clean_lines.raw_lines[line_number]
   2613     # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
   2614     # matches, then do the check with strings collapsed to avoid giving errors for
   2615     # NULLs occurring in strings.
   2616     if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
   2617         error(line_number, 'readability/null', 4, 'Use 0 or null instead of NULL (even in *comments*).')
   2618 
   2619 def get_line_width(line):
   2620     """Determines the width of the line in column positions.
   2621 
   2622     Args:
   2623       line: A string, which may be a Unicode string.
   2624 
   2625     Returns:
   2626       The width of the line in column positions, accounting for Unicode
   2627       combining characters and wide characters.
   2628     """
   2629     if isinstance(line, unicode):
   2630         width = 0
   2631         for c in unicodedata.normalize('NFC', line):
   2632             if unicodedata.east_asian_width(c) in ('W', 'F'):
   2633                 width += 2
   2634             elif not unicodedata.combining(c):
   2635                 width += 1
   2636         return width
   2637     return len(line)
   2638 
   2639 
   2640 def check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line_number, error):
   2641     """Scans the bodies of conditionals and loops, and in particular
   2642     all the arms of conditionals, for violations in the use of braces.
   2643 
   2644     Specifically:
   2645 
   2646     (1) If an arm omits braces, then the following statement must be on one
   2647     physical line.
   2648     (2) If any arm uses braces, all arms must use them.
   2649 
   2650     These checks are only done here if we find the start of an
   2651     'if/for/foreach/while' statement, because this function fails fast
   2652     if it encounters constructs it doesn't understand. Checks
   2653     elsewhere validate other constraints, such as requiring '}' and
   2654     'else' to be on the same line.
   2655 
   2656     Args:
   2657       clean_lines: A CleansedLines instance containing the file.
   2658       line_number: The number of the line to check.
   2659       error: The function to call with any errors found.
   2660     """
   2661 
   2662     # We work with the elided lines. Comments have been removed, but line
   2663     # numbers are preserved, so we can still find situations where
   2664     # single-expression control clauses span multiple lines, or when a
   2665     # comment preceded the expression.
   2666     lines = clean_lines.elided
   2667     line = lines[line_number]
   2668 
   2669     # Match control structures.
   2670     control_match = match(r'\s*(if|foreach|for|while)\s*\(', line)
   2671     if not control_match:
   2672         return
   2673 
   2674     # Found the start of a conditional or loop.
   2675 
   2676     # The following loop handles all potential arms of the control clause.
   2677     # The initial conditions are the following:
   2678     #   - We start on the opening paren '(' of the condition, *unless* we are
   2679     #     handling an 'else' block, in which case there is no condition.
   2680     #   - In the latter case, we start at the position just beyond the 'else'
   2681     #     token.
   2682     expect_conditional_expression = True
   2683     know_whether_using_braces = False
   2684     using_braces = False
   2685     search_for_else_clause = control_match.group(1) == "if"
   2686     current_pos = Position(line_number, control_match.end() - 1)
   2687 
   2688     while True:
   2689         if expect_conditional_expression:
   2690             # Try to find the end of the conditional expression,
   2691             # potentially spanning multiple lines.
   2692             open_paren_pos = current_pos
   2693             close_paren_pos = close_expression(lines, open_paren_pos)
   2694             if close_paren_pos.column < 0:
   2695                 return
   2696             current_pos = close_paren_pos
   2697 
   2698         end_line_of_conditional = current_pos.row
   2699 
   2700         # Find the start of the body.
   2701         current_pos = _find_in_lines(r'\S', lines, current_pos, None)
   2702         if not current_pos:
   2703             return
   2704 
   2705         current_arm_uses_brace = False
   2706         if lines[current_pos.row][current_pos.column] == '{':
   2707             current_arm_uses_brace = True
   2708         if know_whether_using_braces:
   2709             if using_braces != current_arm_uses_brace:
   2710                 error(current_pos.row, 'whitespace/braces', 4,
   2711                       'If one part of an if-else statement uses curly braces, the other part must too.')
   2712                 return
   2713         know_whether_using_braces = True
   2714         using_braces = current_arm_uses_brace
   2715 
   2716         if using_braces:
   2717             # Skip over the entire arm.
   2718             current_pos = close_expression(lines, current_pos)
   2719             if current_pos.column < 0:
   2720                 return
   2721         else:
   2722             # Skip over the current expression.
   2723             current_line_number = current_pos.row
   2724             current_pos = _find_in_lines(r';', lines, current_pos, None)
   2725             if not current_pos:
   2726                 return
   2727             # If the end of the expression is beyond the line just after
   2728             # the close parenthesis or control clause, we've found a
   2729             # single-expression arm that spans multiple lines. (We don't
   2730             # fire this error for expressions ending on the same line; that
   2731             # is a different error, handled elsewhere.)
   2732             if current_pos.row > 1 + end_line_of_conditional:
   2733                 error(current_pos.row, 'whitespace/braces', 4,
   2734                       'A conditional or loop body must use braces if the statement is more than one line long.')
   2735                 return
   2736             current_pos = Position(current_pos.row, 1 + current_pos.column)
   2737 
   2738         # At this point current_pos points just past the end of the last
   2739         # arm. If we just handled the last control clause, we're done.
   2740         if not search_for_else_clause:
   2741             return
   2742 
   2743         # Scan forward for the next non-whitespace character, and see
   2744         # whether we are continuing a conditional (with an 'else' or
   2745         # 'else if'), or are done.
   2746         current_pos = _find_in_lines(r'\S', lines, current_pos, None)
   2747         if not current_pos:
   2748             return
   2749         next_nonspace_string = lines[current_pos.row][current_pos.column:]
   2750         next_conditional = match(r'(else\s*if|else)', next_nonspace_string)
   2751         if not next_conditional:
   2752             # Done processing this 'if' and all arms.
   2753             return
   2754         if next_conditional.group(1) == "else if":
   2755             current_pos = _find_in_lines(r'\(', lines, current_pos, None)
   2756         else:
   2757             current_pos.column += 4  # skip 'else'
   2758             expect_conditional_expression = False
   2759             search_for_else_clause = False
   2760     # End while loop
   2761 
   2762 def check_style(clean_lines, line_number, file_extension, class_state, file_state, enum_state, error):
   2763     """Checks rules from the 'C++ style rules' section of cppguide.html.
   2764 
   2765     Most of these rules are hard to test (naming, comment style), but we
   2766     do what we can.  In particular we check for 4-space indents, line lengths,
   2767     tab usage, spaces inside code, etc.
   2768 
   2769     Args:
   2770       clean_lines: A CleansedLines instance containing the file.
   2771       line_number: The number of the line to check.
   2772       file_extension: The extension (without the dot) of the filename.
   2773       class_state: A _ClassState instance which maintains information about
   2774                    the current stack of nested class declarations being parsed.
   2775       file_state: A _FileState instance which maintains information about
   2776                   the state of things in the file.
   2777       enum_state: A _EnumState instance which maintains the current enum state.
   2778       error: The function to call with any errors found.
   2779     """
   2780 
   2781     raw_lines = clean_lines.raw_lines
   2782     line = raw_lines[line_number]
   2783 
   2784     if line.find('\t') != -1:
   2785         error(line_number, 'whitespace/tab', 1,
   2786               'Tab found; better to use spaces')
   2787 
   2788     cleansed_line = clean_lines.elided[line_number]
   2789     if line and line[-1].isspace():
   2790         error(line_number, 'whitespace/end_of_line', 4,
   2791               'Line ends in whitespace.  Consider deleting these extra spaces.')
   2792 
   2793     if (cleansed_line.count(';') > 1
   2794         # for loops are allowed two ;'s (and may run over two lines).
   2795         and cleansed_line.find('for') == -1
   2796         and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
   2797              or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
   2798         # It's ok to have many commands in a switch case that fits in 1 line
   2799         and not ((cleansed_line.find('case ') != -1
   2800                   or cleansed_line.find('default:') != -1)
   2801                  and cleansed_line.find('break;') != -1)
   2802         # Also it's ok to have many commands in trivial single-line accessors in class definitions.
   2803         and not (match(r'.*\(.*\).*{.*.}', line)
   2804                  and class_state.classinfo_stack
   2805                  and line.count('{') == line.count('}'))
   2806         and not cleansed_line.startswith('#define ')
   2807         # It's ok to use use WTF_MAKE_NONCOPYABLE and WTF_MAKE_FAST_ALLOCATED macros in 1 line
   2808         and not (cleansed_line.find("WTF_MAKE_NONCOPYABLE") != -1
   2809                  and cleansed_line.find("WTF_MAKE_FAST_ALLOCATED") != -1)):
   2810         error(line_number, 'whitespace/newline', 4,
   2811               'More than one command on the same line')
   2812 
   2813     if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
   2814         error(line_number, 'whitespace/operators', 4,
   2815               'Boolean expressions that span multiple lines should have their '
   2816               'operators on the left side of the line instead of the right side.')
   2817 
   2818     # Some more style checks
   2819     check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error)
   2820     check_directive_indentation(clean_lines, line_number, file_state, error)
   2821     check_using_std(clean_lines, line_number, file_state, error)
   2822     check_max_min_macros(clean_lines, line_number, file_state, error)
   2823     check_ctype_functions(clean_lines, line_number, file_state, error)
   2824     check_switch_indentation(clean_lines, line_number, error)
   2825     check_braces(clean_lines, line_number, error)
   2826     check_exit_statement_simplifications(clean_lines, line_number, error)
   2827     check_spacing(file_extension, clean_lines, line_number, error)
   2828     check_check(clean_lines, line_number, error)
   2829     check_for_comparisons_to_zero(clean_lines, line_number, error)
   2830     check_for_null(clean_lines, line_number, file_state, error)
   2831     check_indentation_amount(clean_lines, line_number, error)
   2832     check_enum_casing(clean_lines, line_number, enum_state, error)
   2833 
   2834 
   2835 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
   2836 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
   2837 # Matches the first component of a filename delimited by -s and _s. That is:
   2838 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
   2839 #  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
   2840 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
   2841 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
   2842 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
   2843 
   2844 
   2845 def _drop_common_suffixes(filename):
   2846     """Drops common suffixes like _test.cpp or -inl.h from filename.
   2847 
   2848     For example:
   2849       >>> _drop_common_suffixes('foo/foo-inl.h')
   2850       'foo/foo'
   2851       >>> _drop_common_suffixes('foo/bar/foo.cpp')
   2852       'foo/bar/foo'
   2853       >>> _drop_common_suffixes('foo/foo_internal.h')
   2854       'foo/foo'
   2855       >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
   2856       'foo/foo_unusualinternal'
   2857 
   2858     Args:
   2859       filename: The input filename.
   2860 
   2861     Returns:
   2862       The filename with the common suffix removed.
   2863     """
   2864     for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
   2865                    'inl.h', 'impl.h', 'internal.h'):
   2866         if (filename.endswith(suffix) and len(filename) > len(suffix)
   2867             and filename[-len(suffix) - 1] in ('-', '_')):
   2868             return filename[:-len(suffix) - 1]
   2869     return os.path.splitext(filename)[0]
   2870 
   2871 
   2872 def _classify_include(filename, include, is_system, include_state):
   2873     """Figures out what kind of header 'include' is.
   2874 
   2875     Args:
   2876       filename: The current file cpp_style is running over.
   2877       include: The path to a #included file.
   2878       is_system: True if the #include used <> rather than "".
   2879       include_state: An _IncludeState instance in which the headers are inserted.
   2880 
   2881     Returns:
   2882       One of the _XXX_HEADER constants.
   2883 
   2884     For example:
   2885       >>> _classify_include('foo.cpp', 'config.h', False)
   2886       _CONFIG_HEADER
   2887       >>> _classify_include('foo.cpp', 'foo.h', False)
   2888       _PRIMARY_HEADER
   2889       >>> _classify_include('foo.cpp', 'bar.h', False)
   2890       _OTHER_HEADER
   2891     """
   2892 
   2893     # If it is a system header we know it is classified as _OTHER_HEADER.
   2894     if is_system and not include.startswith('public/'):
   2895         return _OTHER_HEADER
   2896 
   2897     # If the include is named config.h then this is WebCore/config.h.
   2898     if include == "config.h":
   2899         return _CONFIG_HEADER
   2900 
   2901     # There cannot be primary includes in header files themselves. Only an
   2902     # include exactly matches the header filename will be is flagged as
   2903     # primary, so that it triggers the "don't include yourself" check.
   2904     if filename.endswith('.h') and filename != include:
   2905         return _OTHER_HEADER;
   2906 
   2907     # Qt's moc files do not follow the naming and ordering rules, so they should be skipped
   2908     if include.startswith('moc_') and include.endswith('.cpp'):
   2909         return _MOC_HEADER
   2910 
   2911     if include.endswith('.moc'):
   2912         return _MOC_HEADER
   2913 
   2914     # If the target file basename starts with the include we're checking
   2915     # then we consider it the primary header.
   2916     target_base = FileInfo(filename).base_name()
   2917     include_base = FileInfo(include).base_name()
   2918 
   2919     # If we haven't encountered a primary header, then be lenient in checking.
   2920     if not include_state.visited_primary_section():
   2921         if target_base.find(include_base) != -1:
   2922             return _PRIMARY_HEADER
   2923         # Qt private APIs use _p.h suffix.
   2924         if include_base.find(target_base) != -1 and include_base.endswith('_p'):
   2925             return _PRIMARY_HEADER
   2926 
   2927     # If we already encountered a primary header, perform a strict comparison.
   2928     # In case the two filename bases are the same then the above lenient check
   2929     # probably was a false positive.
   2930     elif include_state.visited_primary_section() and target_base == include_base:
   2931         if include == "ResourceHandleWin.h":
   2932             # FIXME: Thus far, we've only seen one example of these, but if we
   2933             # start to see more, please consider generalizing this check
   2934             # somehow.
   2935             return _OTHER_HEADER
   2936         return _PRIMARY_HEADER
   2937 
   2938     return _OTHER_HEADER
   2939 
   2940 
   2941 def _does_primary_header_exist(filename):
   2942     """Return a primary header file name for a file, or empty string
   2943     if the file is not source file or primary header does not exist.
   2944     """
   2945     fileinfo = FileInfo(filename)
   2946     if not fileinfo.is_source():
   2947         return False
   2948     primary_header = fileinfo.no_extension() + ".h"
   2949     return os.path.isfile(primary_header)
   2950 
   2951 
   2952 def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error):
   2953     """Check rules that are applicable to #include lines.
   2954 
   2955     Strings on #include lines are NOT removed from elided line, to make
   2956     certain tasks easier. However, to prevent false positives, checks
   2957     applicable to #include lines in CheckLanguage must be put here.
   2958 
   2959     Args:
   2960       filename: The name of the current file.
   2961       file_extension: The current file extension, without the leading dot.
   2962       clean_lines: A CleansedLines instance containing the file.
   2963       line_number: The number of the line to check.
   2964       include_state: An _IncludeState instance in which the headers are inserted.
   2965       error: The function to call with any errors found.
   2966     """
   2967     # FIXME: For readability or as a possible optimization, consider
   2968     #        exiting early here by checking whether the "build/include"
   2969     #        category should be checked for the given filename.  This
   2970     #        may involve having the error handler classes expose a
   2971     #        should_check() method, in addition to the usual __call__
   2972     #        method.
   2973     line = clean_lines.lines[line_number]
   2974 
   2975     matched = _RE_PATTERN_INCLUDE.search(line)
   2976     if not matched:
   2977         return
   2978 
   2979     include = matched.group(2)
   2980     is_system = (matched.group(1) == '<')
   2981 
   2982     # Look for any of the stream classes that are part of standard C++.
   2983     if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
   2984         error(line_number, 'readability/streams', 3,
   2985               'Streams are highly discouraged.')
   2986 
   2987     # Look for specific includes to fix.
   2988     if include.startswith('wtf/') and is_system:
   2989         error(line_number, 'build/include', 4,
   2990               'wtf includes should be "wtf/file.h" instead of <wtf/file.h>.')
   2991 
   2992     if filename.find('/chromium/') != -1 and include.startswith('cc/CC'):
   2993         error(line_number, 'build/include', 4,
   2994               'cc includes should be "CCFoo.h" instead of "cc/CCFoo.h".')
   2995 
   2996     duplicate_header = include in include_state
   2997     if duplicate_header:
   2998         error(line_number, 'build/include', 4,
   2999               '"%s" already included at %s:%s' %
   3000               (include, filename, include_state[include]))
   3001     else:
   3002         include_state[include] = line_number
   3003 
   3004     header_type = _classify_include(filename, include, is_system, include_state)
   3005     primary_header_exists = _does_primary_header_exist(filename)
   3006     include_state.header_types[line_number] = header_type
   3007 
   3008     # Only proceed if this isn't a duplicate header.
   3009     if duplicate_header:
   3010         return
   3011 
   3012     # We want to ensure that headers appear in the right order:
   3013     # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
   3014     # 2) for header files: alphabetically sorted
   3015     # The include_state object keeps track of the last type seen
   3016     # and complains if the header types are out of order or missing.
   3017     error_message = include_state.check_next_include_order(header_type,
   3018                                                            file_extension == "h",
   3019                                                            primary_header_exists)
   3020 
   3021     # Check to make sure we have a blank line after primary header.
   3022     if not error_message and header_type == _PRIMARY_HEADER:
   3023          next_line = clean_lines.raw_lines[line_number + 1]
   3024          if not is_blank_line(next_line):
   3025             error(line_number, 'build/include_order', 4,
   3026                   'You should add a blank line after implementation file\'s own header.')
   3027 
   3028     # Check to make sure all headers besides config.h and the primary header are
   3029     # alphabetically sorted. Skip Qt's moc files.
   3030     if not error_message and header_type == _OTHER_HEADER:
   3031          previous_line_number = line_number - 1;
   3032          previous_line = clean_lines.lines[previous_line_number]
   3033          previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   3034          while (not previous_match and previous_line_number > 0
   3035                 and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
   3036             previous_line_number -= 1;
   3037             previous_line = clean_lines.lines[previous_line_number]
   3038             previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   3039          if previous_match:
   3040             previous_header_type = include_state.header_types[previous_line_number]
   3041             if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
   3042                 # This type of error is potentially a problem with this line or the previous one,
   3043                 # so if the error is filtered for one line, report it for the next. This is so that
   3044                 # we properly handle patches, for which only modified lines produce errors.
   3045                 if not error(line_number - 1, 'build/include_order', 4, 'Alphabetical sorting problem.'):
   3046                     error(line_number, 'build/include_order', 4, 'Alphabetical sorting problem.')
   3047 
   3048     if error_message:
   3049         if file_extension == 'h':
   3050             error(line_number, 'build/include_order', 4,
   3051                   '%s Should be: alphabetically sorted.' %
   3052                   error_message)
   3053         else:
   3054             error(line_number, 'build/include_order', 4,
   3055                   '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
   3056                   error_message)
   3057 
   3058 
   3059 def check_language(filename, clean_lines, line_number, file_extension, include_state,
   3060                    file_state, error):
   3061     """Checks rules from the 'C++ language rules' section of cppguide.html.
   3062 
   3063     Some of these rules are hard to test (function overloading, using
   3064     uint32 inappropriately), but we do the best we can.
   3065 
   3066     Args:
   3067       filename: The name of the current file.
   3068       clean_lines: A CleansedLines instance containing the file.
   3069       line_number: The number of the line to check.
   3070       file_extension: The extension (without the dot) of the filename.
   3071       include_state: An _IncludeState instance in which the headers are inserted.
   3072       file_state: A _FileState instance which maintains information about
   3073                   the state of things in the file.
   3074       error: The function to call with any errors found.
   3075     """
   3076     # If the line is empty or consists of entirely a comment, no need to
   3077     # check it.
   3078     line = clean_lines.elided[line_number]
   3079     if not line:
   3080         return
   3081 
   3082     matched = _RE_PATTERN_INCLUDE.search(line)
   3083     if matched:
   3084         check_include_line(filename, file_extension, clean_lines, line_number, include_state, error)
   3085         return
   3086 
   3087     # FIXME: figure out if they're using default arguments in fn proto.
   3088 
   3089     # Check to see if they're using an conversion function cast.
   3090     # I just try to capture the most common basic types, though there are more.
   3091     # Parameterless conversion functions, such as bool(), are allowed as they are
   3092     # probably a member operator declaration or default constructor.
   3093     matched = search(
   3094         r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
   3095     if matched:
   3096         # gMock methods are defined using some variant of MOCK_METHODx(name, type)
   3097         # where type may be float(), int(string), etc.  Without context they are
   3098         # virtually indistinguishable from int(x) casts.
   3099         if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
   3100             error(line_number, 'readability/casting', 4,
   3101                   'Using deprecated casting style.  '
   3102                   'Use static_cast<%s>(...) instead' %
   3103                   matched.group(1))
   3104 
   3105     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   3106                        'static_cast',
   3107                        r'\((int|float|double|bool|char|u?int(16|32|64))\)',
   3108                        error)
   3109     # This doesn't catch all cases.  Consider (const char * const)"hello".
   3110     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   3111                        'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
   3112 
   3113     # In addition, we look for people taking the address of a cast.  This
   3114     # is dangerous -- casts can assign to temporaries, so the pointer doesn't
   3115     # point where you think.
   3116     if search(
   3117         r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
   3118         error(line_number, 'runtime/casting', 4,
   3119               ('Are you taking an address of a cast?  '
   3120                'This is dangerous: could be a temp var.  '
   3121                'Take the address before doing the cast, rather than after'))
   3122 
   3123     # Check for people declaring static/global STL strings at the top level.
   3124     # This is dangerous because the C++ language does not guarantee that
   3125     # globals with constructors are initialized before the first access.
   3126     matched = match(
   3127         r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
   3128         line)
   3129     # Make sure it's not a function.
   3130     # Function template specialization looks like: "string foo<Type>(...".
   3131     # Class template definitions look like: "string Foo<Type>::Method(...".
   3132     if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
   3133                              matched.group(3)):
   3134         error(line_number, 'runtime/string', 4,
   3135               'For a static/global string constant, use a C style string instead: '
   3136               '"%schar %s[]".' %
   3137               (matched.group(1), matched.group(2)))
   3138 
   3139     # Check that we're not using RTTI outside of testing code.
   3140     if search(r'\bdynamic_cast<', line):
   3141         error(line_number, 'runtime/rtti', 5,
   3142               'Do not use dynamic_cast<>.  If you need to cast within a class '
   3143               "hierarchy, use static_cast<> to upcast.  Google doesn't support "
   3144               'RTTI.')
   3145 
   3146     if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
   3147         error(line_number, 'runtime/init', 4,
   3148               'You seem to be initializing a member variable with itself.')
   3149 
   3150     if file_extension == 'h':
   3151         # FIXME: check that 1-arg constructors are explicit.
   3152         #        How to tell it's a constructor?
   3153         #        (handled in check_for_non_standard_constructs for now)
   3154         pass
   3155 
   3156     # Check if people are using the verboten C basic types.  The only exception
   3157     # we regularly allow is "unsigned short port" for port.
   3158     if search(r'\bshort port\b', line):
   3159         if not search(r'\bunsigned short port\b', line):
   3160             error(line_number, 'runtime/int', 4,
   3161                   'Use "unsigned short" for ports, not "short"')
   3162 
   3163     # When snprintf is used, the second argument shouldn't be a literal.
   3164     matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
   3165     if matched:
   3166         error(line_number, 'runtime/printf', 3,
   3167               'If you can, use sizeof(%s) instead of %s as the 2nd arg '
   3168               'to snprintf.' % (matched.group(1), matched.group(2)))
   3169 
   3170     # Check if some verboten C functions are being used.
   3171     if search(r'\bsprintf\b', line):
   3172         error(line_number, 'runtime/printf', 5,
   3173               'Never use sprintf.  Use snprintf instead.')
   3174     matched = search(r'\b(strcpy|strcat)\b', line)
   3175     if matched:
   3176         error(line_number, 'runtime/printf', 4,
   3177               'Almost always, snprintf is better than %s' % matched.group(1))
   3178 
   3179     if search(r'\bsscanf\b', line):
   3180         error(line_number, 'runtime/printf', 1,
   3181               'sscanf can be ok, but is slow and can overflow buffers.')
   3182 
   3183     # Check for suspicious usage of "if" like
   3184     # } if (a == b) {
   3185     if search(r'\}\s*if\s*\(', line):
   3186         error(line_number, 'readability/braces', 4,
   3187               'Did you mean "else if"? If not, start a new line for "if".')
   3188 
   3189     # Check for potential format string bugs like printf(foo).
   3190     # We constrain the pattern not to pick things like DocidForPrintf(foo).
   3191     # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
   3192     matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
   3193     if matched:
   3194         error(line_number, 'runtime/printf', 4,
   3195               'Potential format string bug. Do %s("%%s", %s) instead.'
   3196               % (matched.group(1), matched.group(2)))
   3197 
   3198     # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
   3199     matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
   3200     if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
   3201         error(line_number, 'runtime/memset', 4,
   3202               'Did you mean "memset(%s, 0, %s)"?'
   3203               % (matched.group(1), matched.group(2)))
   3204 
   3205     # Detect variable-length arrays.
   3206     matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
   3207     if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
   3208         matched.group(3).find(']') == -1):
   3209         # Split the size using space and arithmetic operators as delimiters.
   3210         # If any of the resulting tokens are not compile time constants then
   3211         # report the error.
   3212         tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
   3213         is_const = True
   3214         skip_next = False
   3215         for tok in tokens:
   3216             if skip_next:
   3217                 skip_next = False
   3218                 continue
   3219 
   3220             if search(r'sizeof\(.+\)', tok):
   3221                 continue
   3222             if search(r'arraysize\(\w+\)', tok):
   3223                 continue
   3224 
   3225             tok = tok.lstrip('(')
   3226             tok = tok.rstrip(')')
   3227             if not tok:
   3228                 continue
   3229             if match(r'\d+', tok):
   3230                 continue
   3231             if match(r'0[xX][0-9a-fA-F]+', tok):
   3232                 continue
   3233             if match(r'k[A-Z0-9]\w*', tok):
   3234                 continue
   3235             if match(r'(.+::)?k[A-Z0-9]\w*', tok):
   3236                 continue
   3237             if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
   3238                 continue
   3239             # A catch all for tricky sizeof cases, including 'sizeof expression',
   3240             # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
   3241             # requires skipping the next token becasue we split on ' ' and '*'.
   3242             if tok.startswith('sizeof'):
   3243                 skip_next = True
   3244                 continue
   3245             is_const = False
   3246             break
   3247         if not is_const:
   3248             error(line_number, 'runtime/arrays', 1,
   3249                   'Do not use variable-length arrays.  Use an appropriately named '
   3250                   "('k' followed by CamelCase) compile-time constant for the size.")
   3251 
   3252     # Check for use of unnamed namespaces in header files.  Registration
   3253     # macros are typically OK, so we allow use of "namespace {" on lines
   3254     # that end with backslashes.
   3255     if (file_extension == 'h'
   3256         and search(r'\bnamespace\s*{', line)
   3257         and line[-1] != '\\'):
   3258         error(line_number, 'build/namespaces', 4,
   3259               'Do not use unnamed namespaces in header files.  See '
   3260               'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
   3261               ' for more information.')
   3262 
   3263     # Check for plain bitfields declared without either "singed" or "unsigned".
   3264     # Most compilers treat such bitfields as signed, but there are still compilers like
   3265     # RVCT 4.0 that use unsigned by default.
   3266     matched = re.match(r'\s*((const|mutable)\s+)?(char|(short(\s+int)?)|int|long(\s+(long|int))?)\s+[a-zA-Z_][a-zA-Z0-9_]*\s*:\s*\d+\s*;', line)
   3267     if matched:
   3268         error(line_number, 'runtime/bitfields', 5,
   3269               'Please declare integral type bitfields with either signed or unsigned.')
   3270 
   3271     check_identifier_name_in_declaration(filename, line_number, line, file_state, error)
   3272 
   3273     # Check for unsigned int (should be just 'unsigned')
   3274     if search(r'\bunsigned int\b', line):
   3275         error(line_number, 'runtime/unsigned', 1,
   3276               'Omit int when using unsigned')
   3277 
   3278     # Check for usage of static_cast<Classname*>.
   3279     check_for_object_static_cast(filename, line_number, line, error)
   3280 
   3281 
   3282 def check_identifier_name_in_declaration(filename, line_number, line, file_state, error):
   3283     """Checks if identifier names contain any underscores.
   3284 
   3285     As identifiers in libraries we are using have a bunch of
   3286     underscores, we only warn about the declarations of identifiers
   3287     and don't check use of identifiers.
   3288 
   3289     Args:
   3290       filename: The name of the current file.
   3291       line_number: The number of the line to check.
   3292       line: The line of code to check.
   3293       file_state: A _FileState instance which maintains information about
   3294                   the state of things in the file.
   3295       error: The function to call with any errors found.
   3296     """
   3297     # We don't check return and delete statements and conversion operator declarations.
   3298     if match(r'\s*(return|delete|operator)\b', line):
   3299         return
   3300 
   3301     # Basically, a declaration is a type name followed by whitespaces
   3302     # followed by an identifier. The type name can be complicated
   3303     # due to type adjectives and templates. We remove them first to
   3304     # simplify the process to find declarations of identifiers.
   3305 
   3306     # Convert "long long", "long double", and "long long int" to
   3307     # simple types, but don't remove simple "long".
   3308     line = sub(r'long (long )?(?=long|double|int)', '', line)
   3309     # Convert unsigned/signed types to simple types, too.
   3310     line = sub(r'(unsigned|signed) (?=char|short|int|long)', '', line)
   3311     line = sub(r'\b(inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line)
   3312 
   3313     # Remove "new" and "new (expr)" to simplify, too.
   3314     line = sub(r'new\s*(\([^)]*\))?', '', line)
   3315 
   3316     # Remove all template parameters by removing matching < and >.
   3317     # Loop until no templates are removed to remove nested templates.
   3318     while True:
   3319         line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line)
   3320         if not number_of_replacements:
   3321             break
   3322 
   3323     # Declarations of local variables can be in condition expressions
   3324     # of control flow statements (e.g., "if (RenderObject* p = o->parent())").
   3325     # We remove the keywords and the first parenthesis.
   3326     #
   3327     # Declarations in "while", "if", and "switch" are different from
   3328     # other declarations in two aspects:
   3329     #
   3330     # - There can be only one declaration between the parentheses.
   3331     #   (i.e., you cannot write "if (int i = 0, j = 1) {}")
   3332     # - The variable must be initialized.
   3333     #   (i.e., you cannot write "if (int i) {}")
   3334     #
   3335     # and we will need different treatments for them.
   3336     line = sub(r'^\s*for\s*\(', '', line)
   3337     line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line)
   3338 
   3339     # Detect variable and functions.
   3340     type_regexp = r'\w([\w]|\s*[*&]\s*|::)+'
   3341     identifier_regexp = r'(?P<identifier>[\w:]+)'
   3342     maybe_bitfield_regexp = r'(:\s*\d+\s*)?'
   3343     character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)'
   3344     declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*' + maybe_bitfield_regexp + character_after_identifier_regexp
   3345     declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp
   3346     is_function_arguments = False
   3347     number_of_identifiers = 0
   3348     while True:
   3349         # If we are seeing the first identifier or arguments of a
   3350         # function, there should be a type name before an identifier.
   3351         if not number_of_identifiers or is_function_arguments:
   3352             declaration_regexp = declaration_with_type_regexp
   3353         else:
   3354             declaration_regexp = declaration_without_type_regexp
   3355 
   3356         matched = match(declaration_regexp, line)
   3357         if not matched:
   3358             return
   3359         identifier = matched.group('identifier')
   3360         character_after_identifier = matched.group('character_after_identifier')
   3361 
   3362         # If we removed a non-for-control statement, the character after
   3363         # the identifier should be '='. With this rule, we can avoid
   3364         # warning for cases like "if (val & INT_MAX) {".
   3365         if control_statement and character_after_identifier != '=':
   3366             return
   3367 
   3368         is_function_arguments = is_function_arguments or character_after_identifier == '('
   3369 
   3370         # Remove "m_" and "s_" to allow them.
   3371         modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier)
   3372         if not file_state.is_objective_c() and modified_identifier.find('_') >= 0:
   3373             # Various exceptions to the rule: JavaScript op codes functions, const_iterator.
   3374             if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('op_') >= 0)
   3375                 and not (filename.find('gtk') >= 0 and modified_identifier.startswith('webkit_') >= 0)
   3376                 and not modified_identifier.startswith('tst_')
   3377                 and not modified_identifier.startswith('webkit_dom_object_')
   3378                 and not modified_identifier.startswith('webkit_soup')
   3379                 and not modified_identifier.startswith('NPN_')
   3380                 and not modified_identifier.startswith('NPP_')
   3381                 and not modified_identifier.startswith('NP_')
   3382                 and not modified_identifier.startswith('qt_')
   3383                 and not modified_identifier.startswith('_q_')
   3384                 and not modified_identifier.startswith('cairo_')
   3385                 and not modified_identifier.startswith('Ecore_')
   3386                 and not modified_identifier.startswith('Eina_')
   3387                 and not modified_identifier.startswith('Evas_')
   3388                 and not modified_identifier.startswith('Ewk_')
   3389                 and not modified_identifier.startswith('cti_')
   3390                 and not modified_identifier.find('::qt_') >= 0
   3391                 and not modified_identifier.find('::_q_') >= 0
   3392                 and not modified_identifier == "const_iterator"
   3393                 and not modified_identifier == "vm_throw"
   3394                 and not modified_identifier == "DFG_OPERATION"):
   3395                 error(line_number, 'readability/naming/underscores', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.")
   3396 
   3397         # Check for variables named 'l', these are too easy to confuse with '1' in some fonts
   3398         if modified_identifier == 'l':
   3399             error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.")
   3400 
   3401         # There can be only one declaration in non-for-control statements.
   3402         if control_statement:
   3403             return
   3404         # We should continue checking if this is a function
   3405         # declaration because we need to check its arguments.
   3406         # Also, we need to check multiple declarations.
   3407         if character_after_identifier != '(' and character_after_identifier != ',':
   3408             return
   3409 
   3410         number_of_identifiers += 1
   3411         line = line[matched.end():]
   3412 
   3413 
   3414 def check_for_toFoo_definition(filename, pattern, error):
   3415     """ Reports for using static_cast instead of toFoo convenience function.
   3416 
   3417     This function will output warnings to make sure you are actually using
   3418     the added toFoo conversion functions rather than directly hard coding
   3419     the static_cast<Classname*> call. For example, you should toHTMLELement(Node*)
   3420     to convert Node* to HTMLElement*, instead of static_cast<HTMLElement*>(Node*)
   3421 
   3422     Args:
   3423       filename: The name of the header file in which to check for toFoo definition.
   3424       pattern: The conversion function pattern to grep for.
   3425       error: The function to call with any errors found.
   3426     """
   3427     def get_abs_filepath(filename):
   3428         fileSystem = FileSystem()
   3429         base_dir = fileSystem.path_to_module(FileSystem.__module__).split('WebKit', 1)[0]
   3430         base_dir = ''.join((base_dir, 'WebKit/Source'))
   3431         for root, dirs, names in os.walk(base_dir):
   3432             if filename in names:
   3433                 return os.path.join(root, filename)
   3434         return None
   3435 
   3436     def grep(lines, pattern, error):
   3437         matches = []
   3438         function_state = None
   3439         for line_number in xrange(lines.num_lines()):
   3440             line = (lines.elided[line_number]).rstrip()
   3441             try:
   3442                 if pattern in line:
   3443                     if not function_state:
   3444                         function_state = _FunctionState(1)
   3445                     detect_functions(lines, line_number, function_state, error)
   3446                     # Exclude the match of dummy conversion function. Dummy function is just to
   3447                     # catch invalid conversions and shouldn't be part of possible alternatives.
   3448                     result = re.search(r'%s(\s+)%s' % ("void", pattern), line)
   3449                     if not result:
   3450                         matches.append([line, function_state.body_start_position.row, function_state.end_position.row + 1])
   3451                         function_state = None
   3452             except UnicodeDecodeError:
   3453                 # There would be no non-ascii characters in the codebase ever. The only exception
   3454                 # would be comments/copyright text which might have non-ascii characters. Hence,
   3455                 # it is prefectly safe to catch the UnicodeDecodeError and just pass the line.
   3456                 pass
   3457 
   3458         return matches
   3459 
   3460     def check_in_mock_header(filename, matches=None):
   3461         if not filename == 'Foo.h':
   3462             return False
   3463 
   3464         header_file = None
   3465         try:
   3466             header_file = CppChecker.fs.read_text_file(filename)
   3467         except IOError:
   3468             return False
   3469         line_number = 0
   3470         for line in header_file:
   3471             line_number += 1
   3472             matched = re.search(r'\btoFoo\b', line)
   3473             if matched:
   3474                 matches.append(['toFoo', line_number, line_number + 3])
   3475         return True
   3476 
   3477     # For unit testing only, avoid header search and lookup locally.
   3478     matches = []
   3479     mock_def_found = check_in_mock_header(filename, matches)
   3480     if mock_def_found:
   3481         return matches
   3482 
   3483     # Regular style check flow. Search for actual header file & defs.
   3484     file_path = get_abs_filepath(filename)
   3485     if not file_path:
   3486         return None
   3487     try:
   3488         f = open(file_path)
   3489         clean_lines = CleansedLines(f.readlines())
   3490     finally:
   3491         f.close()
   3492 
   3493     # Make a list of all genuine alternatives to static_cast.
   3494     matches = grep(clean_lines, pattern, error)
   3495     return matches
   3496 
   3497 
   3498 def check_for_object_static_cast(processing_file, line_number, line, error):
   3499     """Checks for a Cpp-style static cast on objects by looking for the pattern.
   3500 
   3501     Args:
   3502       processing_file: The name of the processing file.
   3503       line_number: The number of the line to check.
   3504       line: The line of code to check.
   3505       error: The function to call with any errors found.
   3506     """
   3507     matched = search(r'\bstatic_cast<(\s*\w*:?:?\w+\s*\*+\s*)>', line)
   3508     if not matched:
   3509         return
   3510 
   3511     class_name = re.sub('[\*]', '', matched.group(1))
   3512     class_name = class_name.strip()
   3513     # Ignore (for now) when the casting is to void*,
   3514     if class_name == 'void':
   3515         return
   3516 
   3517     namespace_pos = class_name.find(':')
   3518     if not namespace_pos == -1:
   3519         class_name = class_name[namespace_pos + 2:]
   3520 
   3521     header_file = ''.join((class_name, '.h'))
   3522     matches = check_for_toFoo_definition(header_file, ''.join(('to', class_name)), error)
   3523     # Ignore (for now) if not able to find the header where toFoo might be defined.
   3524     # TODO: Handle cases where Classname might be defined in some other header or cpp file.
   3525     if matches is None:
   3526         return
   3527 
   3528     report_error = True
   3529     # Ensure found static_cast instance is not from within toFoo definition itself.
   3530     if (os.path.basename(processing_file) == header_file):
   3531         for item in matches:
   3532             if line_number in range(item[1], item[2]):
   3533                 report_error = False
   3534                 break
   3535 
   3536     if report_error:
   3537         if len(matches):
   3538             # toFoo is defined - enforce using it.
   3539             # TODO: Suggest an appropriate toFoo from the alternatives present in matches.
   3540             error(line_number, 'runtime/casting', 4,
   3541                   'static_cast of class objects is not allowed. Use to%s defined in %s.' %
   3542                   (class_name, header_file))
   3543         else:
   3544             # No toFoo defined - enforce definition & usage.
   3545             # TODO: Automate the generation of toFoo() to avoid any slippages ever.
   3546             error(line_number, 'runtime/casting', 4,
   3547                   'static_cast of class objects is not allowed. Add to%s in %s and use it instead.' %
   3548                   (class_name, header_file))
   3549 
   3550 
   3551 def check_c_style_cast(line_number, line, raw_line, cast_type, pattern,
   3552                        error):
   3553     """Checks for a C-style cast by looking for the pattern.
   3554 
   3555     This also handles sizeof(type) warnings, due to similarity of content.
   3556 
   3557     Args:
   3558       line_number: The number of the line to check.
   3559       line: The line of code to check.
   3560       raw_line: The raw line of code to check, with comments.
   3561       cast_type: The string for the C++ cast to recommend.  This is either
   3562                  reinterpret_cast or static_cast, depending.
   3563       pattern: The regular expression used to find C-style casts.
   3564       error: The function to call with any errors found.
   3565     """
   3566     matched = search(pattern, line)
   3567     if not matched:
   3568         return
   3569 
   3570     # e.g., sizeof(int)
   3571     sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
   3572     if sizeof_match:
   3573         error(line_number, 'runtime/sizeof', 1,
   3574               'Using sizeof(type).  Use sizeof(varname) instead if possible')
   3575         return
   3576 
   3577     remainder = line[matched.end(0):]
   3578 
   3579     # The close paren is for function pointers as arguments to a function.
   3580     # eg, void foo(void (*bar)(int));
   3581     # The semicolon check is a more basic function check; also possibly a
   3582     # function pointer typedef.
   3583     # eg, void foo(int); or void foo(int) const;
   3584     # The equals check is for function pointer assignment.
   3585     # eg, void *(*foo)(int) = ...
   3586     #
   3587     # Right now, this will only catch cases where there's a single argument, and
   3588     # it's unnamed.  It should probably be expanded to check for multiple
   3589     # arguments with some unnamed.
   3590     function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
   3591     if function_match:
   3592         if (not function_match.group(3)
   3593             or function_match.group(3) == ';'
   3594             or raw_line.find('/*') < 0):
   3595             error(line_number, 'readability/function', 3,
   3596                   'All parameters should be named in a function')
   3597         return
   3598 
   3599     # At this point, all that should be left is actual casts.
   3600     error(line_number, 'readability/casting', 4,
   3601           'Using C-style cast.  Use %s<%s>(...) instead' %
   3602           (cast_type, matched.group(1)))
   3603 
   3604 
   3605 _HEADERS_CONTAINING_TEMPLATES = (
   3606     ('<deque>', ('deque',)),
   3607     ('<functional>', ('unary_function', 'binary_function',
   3608                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
   3609                       'negate',
   3610                       'equal_to', 'not_equal_to', 'greater', 'less',
   3611                       'greater_equal', 'less_equal',
   3612                       'logical_and', 'logical_or', 'logical_not',
   3613                       'unary_negate', 'not1', 'binary_negate', 'not2',
   3614                       'bind1st', 'bind2nd',
   3615                       'pointer_to_unary_function',
   3616                       'pointer_to_binary_function',
   3617                       'ptr_fun',
   3618                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
   3619                       'mem_fun_ref_t',
   3620                       'const_mem_fun_t', 'const_mem_fun1_t',
   3621                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
   3622                       'mem_fun_ref',
   3623                      )),
   3624     ('<limits>', ('numeric_limits',)),
   3625     ('<list>', ('list',)),
   3626     ('<map>', ('map', 'multimap',)),
   3627     ('<memory>', ('allocator',)),
   3628     ('<queue>', ('queue', 'priority_queue',)),
   3629     ('<set>', ('set', 'multiset',)),
   3630     ('<stack>', ('stack',)),
   3631     ('<string>', ('char_traits', 'basic_string',)),
   3632     ('<utility>', ('pair',)),
   3633     ('<vector>', ('vector',)),
   3634 
   3635     # gcc extensions.
   3636     # Note: std::hash is their hash, ::hash is our hash
   3637     ('<hash_map>', ('hash_map', 'hash_multimap',)),
   3638     ('<hash_set>', ('hash_set', 'hash_multiset',)),
   3639     ('<slist>', ('slist',)),
   3640     )
   3641 
   3642 _HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
   3643     # We can trust with reasonable confidence that map gives us pair<>, too.
   3644     'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
   3645 }
   3646 
   3647 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
   3648 
   3649 _re_pattern_algorithm_header = []
   3650 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
   3651                   'transform'):
   3652     # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   3653     # type::max().
   3654     _re_pattern_algorithm_header.append(
   3655         (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
   3656          _template,
   3657          '<algorithm>'))
   3658 
   3659 _re_pattern_templates = []
   3660 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
   3661     for _template in _templates:
   3662         _re_pattern_templates.append(
   3663             (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
   3664              _template + '<>',
   3665              _header))
   3666 
   3667 
   3668 def files_belong_to_same_module(filename_cpp, filename_h):
   3669     """Check if these two filenames belong to the same module.
   3670 
   3671     The concept of a 'module' here is a as follows:
   3672     foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
   3673     same 'module' if they are in the same directory.
   3674     some/path/public/xyzzy and some/path/internal/xyzzy are also considered
   3675     to belong to the same module here.
   3676 
   3677     If the filename_cpp contains a longer path than the filename_h, for example,
   3678     '/absolute/path/to/base/sysinfo.cpp', and this file would include
   3679     'base/sysinfo.h', this function also produces the prefix needed to open the
   3680     header. This is used by the caller of this function to more robustly open the
   3681     header file. We don't have access to the real include paths in this context,
   3682     so we need this guesswork here.
   3683 
   3684     Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
   3685     according to this implementation. Because of this, this function gives
   3686     some false positives. This should be sufficiently rare in practice.
   3687 
   3688     Args:
   3689       filename_cpp: is the path for the .cpp file
   3690       filename_h: is the path for the header path
   3691 
   3692     Returns:
   3693       Tuple with a bool and a string:
   3694       bool: True if filename_cpp and filename_h belong to the same module.
   3695       string: the additional prefix needed to open the header file.
   3696     """
   3697 
   3698     if not filename_cpp.endswith('.cpp'):
   3699         return (False, '')
   3700     filename_cpp = filename_cpp[:-len('.cpp')]
   3701     if filename_cpp.endswith('_unittest'):
   3702         filename_cpp = filename_cpp[:-len('_unittest')]
   3703     elif filename_cpp.endswith('_test'):
   3704         filename_cpp = filename_cpp[:-len('_test')]
   3705     filename_cpp = filename_cpp.replace('/public/', '/')
   3706     filename_cpp = filename_cpp.replace('/internal/', '/')
   3707 
   3708     if not filename_h.endswith('.h'):
   3709         return (False, '')
   3710     filename_h = filename_h[:-len('.h')]
   3711     if filename_h.endswith('-inl'):
   3712         filename_h = filename_h[:-len('-inl')]
   3713     filename_h = filename_h.replace('/public/', '/')
   3714     filename_h = filename_h.replace('/internal/', '/')
   3715 
   3716     files_belong_to_same_module = filename_cpp.endswith(filename_h)
   3717     common_path = ''
   3718     if files_belong_to_same_module:
   3719         common_path = filename_cpp[:-len(filename_h)]
   3720     return files_belong_to_same_module, common_path
   3721 
   3722 
   3723 def update_include_state(filename, include_state):
   3724     """Fill up the include_state with new includes found from the file.
   3725 
   3726     Args:
   3727       filename: the name of the header to read.
   3728       include_state: an _IncludeState instance in which the headers are inserted.
   3729       io: The io factory to use to read the file. Provided for testability.
   3730 
   3731     Returns:
   3732       True if a header was succesfully added. False otherwise.
   3733     """
   3734     header_file = None
   3735     try:
   3736         header_file = CppChecker.fs.read_text_file(filename)
   3737     except IOError:
   3738         return False
   3739     line_number = 0
   3740     for line in header_file:
   3741         line_number += 1
   3742         clean_line = cleanse_comments(line)
   3743         matched = _RE_PATTERN_INCLUDE.search(clean_line)
   3744         if matched:
   3745             include = matched.group(2)
   3746             # The value formatting is cute, but not really used right now.
   3747             # What matters here is that the key is in include_state.
   3748             include_state.setdefault(include, '%s:%d' % (filename, line_number))
   3749     return True
   3750 
   3751 
   3752 def check_for_include_what_you_use(filename, clean_lines, include_state, error):
   3753     """Reports for missing stl includes.
   3754 
   3755     This function will output warnings to make sure you are including the headers
   3756     necessary for the stl containers and functions that you use. We only give one
   3757     reason to include a header. For example, if you use both equal_to<> and
   3758     less<> in a .h file, only one (the latter in the file) of these will be
   3759     reported as a reason to include the <functional>.
   3760 
   3761     Args:
   3762       filename: The name of the current file.
   3763       clean_lines: A CleansedLines instance containing the file.
   3764       include_state: An _IncludeState instance.
   3765       error: The function to call with any errors found.
   3766     """
   3767     required = {}  # A map of header name to line_number and the template entity.
   3768         # Example of required: { '<functional>': (1219, 'less<>') }
   3769 
   3770     for line_number in xrange(clean_lines.num_lines()):
   3771         line = clean_lines.elided[line_number]
   3772         if not line or line[0] == '#':
   3773             continue
   3774 
   3775         # String is special -- it is a non-templatized type in STL.
   3776         if _RE_PATTERN_STRING.search(line):
   3777             required['<string>'] = (line_number, 'string')
   3778 
   3779         for pattern, template, header in _re_pattern_algorithm_header:
   3780             if pattern.search(line):
   3781                 required[header] = (line_number, template)
   3782 
   3783         # The following function is just a speed up, no semantics are changed.
   3784         if not '<' in line:  # Reduces the cpu time usage by skipping lines.
   3785             continue
   3786 
   3787         for pattern, template, header in _re_pattern_templates:
   3788             if pattern.search(line):
   3789                 required[header] = (line_number, template)
   3790 
   3791     # The policy is that if you #include something in foo.h you don't need to
   3792     # include it again in foo.cpp. Here, we will look at possible includes.
   3793     # Let's copy the include_state so it is only messed up within this function.
   3794     include_state = include_state.copy()
   3795 
   3796     # Did we find the header for this file (if any) and succesfully load it?
   3797     header_found = False
   3798 
   3799     # Use the absolute path so that matching works properly.
   3800     abs_filename = os.path.abspath(filename)
   3801 
   3802     # For Emacs's flymake.
   3803     # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
   3804     # by flymake and that file name might end with '_flymake.cpp'. In that case,
   3805     # restore original file name here so that the corresponding header file can be
   3806     # found.
   3807     # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
   3808     # instead of 'foo_flymake.h'
   3809     abs_filename = re.sub(r'_flymake\.cpp$', '.cpp', abs_filename)
   3810 
   3811     # include_state is modified during iteration, so we iterate over a copy of
   3812     # the keys.
   3813     for header in include_state.keys():  #NOLINT
   3814         (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
   3815         fullpath = common_path + header
   3816         if same_module and update_include_state(fullpath, include_state):
   3817             header_found = True
   3818 
   3819     # If we can't find the header file for a .cpp, assume it's because we don't
   3820     # know where to look. In that case we'll give up as we're not sure they
   3821     # didn't include it in the .h file.
   3822     # FIXME: Do a better job of finding .h files so we are confident that
   3823     #        not having the .h file means there isn't one.
   3824     if filename.endswith('.cpp') and not header_found:
   3825         return
   3826 
   3827     # All the lines have been processed, report the errors found.
   3828     for required_header_unstripped in required:
   3829         template = required[required_header_unstripped][1]
   3830         if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
   3831             headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
   3832             if [True for header in headers if header in include_state]:
   3833                 continue
   3834         if required_header_unstripped.strip('<>"') not in include_state:
   3835             error(required[required_header_unstripped][0],
   3836                   'build/include_what_you_use', 4,
   3837                   'Add #include ' + required_header_unstripped + ' for ' + template)
   3838 
   3839 
   3840 def process_line(filename, file_extension,
   3841                  clean_lines, line, include_state, function_state,
   3842                  class_state, file_state, enum_state, error):
   3843     """Processes a single line in the file.
   3844 
   3845     Args:
   3846       filename: Filename of the file that is being processed.
   3847       file_extension: The extension (dot not included) of the file.
   3848       clean_lines: An array of strings, each representing a line of the file,
   3849                    with comments stripped.
   3850       line: Number of line being processed.
   3851       include_state: An _IncludeState instance in which the headers are inserted.
   3852       function_state: A _FunctionState instance which counts function lines, etc.
   3853       class_state: A _ClassState instance which maintains information about
   3854                    the current stack of nested class declarations being parsed.
   3855       file_state: A _FileState instance which maintains information about
   3856                   the state of things in the file.
   3857       enum_state: A _EnumState instance which maintains an enum declaration
   3858                   state.
   3859       error: A callable to which errors are reported, which takes arguments:
   3860              line number, error level, and message
   3861 
   3862     """
   3863     raw_lines = clean_lines.raw_lines
   3864     detect_functions(clean_lines, line, function_state, error)
   3865     check_for_function_lengths(clean_lines, line, function_state, error)
   3866     if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
   3867         return
   3868     if match(r'\s*\b__asm\b', raw_lines[line]):  # Ignore asm lines as they format differently.
   3869         return
   3870     check_function_definition(filename, file_extension, clean_lines, line, function_state, error)
   3871     check_pass_ptr_usage(clean_lines, line, function_state, error)
   3872     check_for_leaky_patterns(clean_lines, line, function_state, error)
   3873     check_for_multiline_comments_and_strings(clean_lines, line, error)
   3874     check_style(clean_lines, line, file_extension, class_state, file_state, enum_state, error)
   3875     check_language(filename, clean_lines, line, file_extension, include_state,
   3876                    file_state, error)
   3877     check_for_non_standard_constructs(clean_lines, line, class_state, error)
   3878     check_posix_threading(clean_lines, line, error)
   3879     check_invalid_increment(clean_lines, line, error)
   3880     check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line, error)
   3881 
   3882 def _process_lines(filename, file_extension, lines, error, min_confidence):
   3883     """Performs lint checks and reports any errors to the given error function.
   3884 
   3885     Args:
   3886       filename: Filename of the file that is being processed.
   3887       file_extension: The extension (dot not included) of the file.
   3888       lines: An array of strings, each representing a line of the file, with the
   3889              last element being empty if the file is termined with a newline.
   3890       error: A callable to which errors are reported, which takes 4 arguments:
   3891     """
   3892     lines = (['// marker so line numbers and indices both start at 1'] + lines +
   3893              ['// marker so line numbers end in a known way'])
   3894 
   3895     include_state = _IncludeState()
   3896     function_state = _FunctionState(min_confidence)
   3897     class_state = _ClassState()
   3898 
   3899     check_for_copyright(lines, error)
   3900 
   3901     if file_extension == 'h':
   3902         check_for_header_guard(filename, lines, error)
   3903 
   3904     remove_multi_line_comments(lines, error)
   3905     clean_lines = CleansedLines(lines)
   3906     file_state = _FileState(clean_lines, file_extension)
   3907     enum_state = _EnumState()
   3908     for line in xrange(clean_lines.num_lines()):
   3909         process_line(filename, file_extension, clean_lines, line,
   3910                      include_state, function_state, class_state, file_state,
   3911                      enum_state, error)
   3912     class_state.check_finished(error)
   3913 
   3914     check_for_include_what_you_use(filename, clean_lines, include_state, error)
   3915 
   3916     # We check here rather than inside process_line so that we see raw
   3917     # lines rather than "cleaned" lines.
   3918     check_for_unicode_replacement_characters(lines, error)
   3919 
   3920     check_for_new_line_at_eof(lines, error)
   3921 
   3922 
   3923 class CppChecker(object):
   3924 
   3925     """Processes C++ lines for checking style."""
   3926 
   3927     # This list is used to--
   3928     #
   3929     # (1) generate an explicit list of all possible categories,
   3930     # (2) unit test that all checked categories have valid names, and
   3931     # (3) unit test that all categories are getting unit tested.
   3932     #
   3933     categories = set([
   3934         'build/class',
   3935         'build/deprecated',
   3936         'build/endif_comment',
   3937         'build/forward_decl',
   3938         'build/header_guard',
   3939         'build/include',
   3940         'build/include_order',
   3941         'build/include_what_you_use',
   3942         'build/namespaces',
   3943         'build/printf_format',
   3944         'build/storage_class',
   3945         'build/using_std',
   3946         'legal/copyright',
   3947         'readability/braces',
   3948         'readability/casting',
   3949         'readability/check',
   3950         'readability/comparison_to_zero',
   3951         'readability/constructors',
   3952         'readability/control_flow',
   3953         'readability/enum_casing',
   3954         'readability/fn_size',
   3955         'readability/function',
   3956         'readability/multiline_comment',
   3957         'readability/multiline_string',
   3958         'readability/parameter_name',
   3959         'readability/naming',
   3960         'readability/naming/underscores',
   3961         'readability/null',
   3962         'readability/pass_ptr',
   3963         'readability/streams',
   3964         'readability/todo',
   3965         'readability/utf8',
   3966         'readability/webkit_export',
   3967         'runtime/arrays',
   3968         'runtime/bitfields',
   3969         'runtime/casting',
   3970         'runtime/ctype_function',
   3971         'runtime/explicit',
   3972         'runtime/init',
   3973         'runtime/int',
   3974         'runtime/invalid_increment',
   3975         'runtime/leaky_pattern',
   3976         'runtime/max_min_macros',
   3977         'runtime/memset',
   3978         'runtime/printf',
   3979         'runtime/printf_format',
   3980         'runtime/references',
   3981         'runtime/rtti',
   3982         'runtime/sizeof',
   3983         'runtime/string',
   3984         'runtime/threadsafe_fn',
   3985         'runtime/unsigned',
   3986         'runtime/virtual',
   3987         'whitespace/blank_line',
   3988         'whitespace/braces',
   3989         'whitespace/comma',
   3990         'whitespace/comments',
   3991         'whitespace/declaration',
   3992         'whitespace/end_of_line',
   3993         'whitespace/ending_newline',
   3994         'whitespace/indent',
   3995         'whitespace/line_length',
   3996         'whitespace/newline',
   3997         'whitespace/operators',
   3998         'whitespace/parens',
   3999         'whitespace/semicolon',
   4000         'whitespace/tab',
   4001         'whitespace/todo',
   4002         ])
   4003 
   4004     fs = None
   4005 
   4006     def __init__(self, file_path, file_extension, handle_style_error,
   4007                  min_confidence, fs=None):
   4008         """Create a CppChecker instance.
   4009 
   4010         Args:
   4011           file_extension: A string that is the file extension, without
   4012                           the leading dot.
   4013 
   4014         """
   4015         self.file_extension = file_extension
   4016         self.file_path = file_path
   4017         self.handle_style_error = handle_style_error
   4018         self.min_confidence = min_confidence
   4019         CppChecker.fs = fs or FileSystem()
   4020 
   4021     # Useful for unit testing.
   4022     def __eq__(self, other):
   4023         """Return whether this CppChecker instance is equal to another."""
   4024         if self.file_extension != other.file_extension:
   4025             return False
   4026         if self.file_path != other.file_path:
   4027             return False
   4028         if self.handle_style_error != other.handle_style_error:
   4029             return False
   4030         if self.min_confidence != other.min_confidence:
   4031             return False
   4032 
   4033         return True
   4034 
   4035     # Useful for unit testing.
   4036     def __ne__(self, other):
   4037         # Python does not automatically deduce __ne__() from __eq__().
   4038         return not self.__eq__(other)
   4039 
   4040     def check(self, lines):
   4041         _process_lines(self.file_path, self.file_extension, lines,
   4042                        self.handle_style_error, self.min_confidence)
   4043 
   4044 
   4045 # FIXME: Remove this function (requires refactoring unit tests).
   4046 def process_file_data(filename, file_extension, lines, error, min_confidence, fs=None):
   4047     checker = CppChecker(filename, file_extension, error, min_confidence, fs)
   4048     checker.check(lines)
   4049