Home | History | Annotate | Download | only in checkers
      1 # -*- coding: utf-8 -*-
      2 #
      3 # Copyright (C) 2009, 2010, 2012 Google Inc. All rights reserved.
      4 # Copyright (C) 2009 Torch Mobile Inc.
      5 # Copyright (C) 2009 Apple Inc. All rights reserved.
      6 # Copyright (C) 2010 Chris Jerdonek (cjerdonek (at] webkit.org)
      7 #
      8 # Redistribution and use in source and binary forms, with or without
      9 # modification, are permitted provided that the following conditions are
     10 # met:
     11 #
     12 #    * Redistributions of source code must retain the above copyright
     13 # notice, this list of conditions and the following disclaimer.
     14 #    * Redistributions in binary form must reproduce the above
     15 # copyright notice, this list of conditions and the following disclaimer
     16 # in the documentation and/or other materials provided with the
     17 # distribution.
     18 #    * Neither the name of Google Inc. nor the names of its
     19 # contributors may be used to endorse or promote products derived from
     20 # this software without specific prior written permission.
     21 #
     22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     23 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     24 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     25 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     26 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     27 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     28 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     29 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     30 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     31 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     32 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     33 
     34 # This is the modified version of Google's cpplint. The original code is
     35 # http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
     36 
     37 """Support for check-webkit-style."""
     38 
     39 import math  # for log
     40 import os
     41 import os.path
     42 import re
     43 import sre_compile
     44 import string
     45 import sys
     46 import unicodedata
     47 
     48 from webkitpy.common.memoized import memoized
     49 from webkitpy.common.system.filesystem import FileSystem
     50 
     51 # Headers that we consider STL headers.
     52 _STL_HEADERS = frozenset([
     53     'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
     54     'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
     55     'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
     56     'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
     57     'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
     58     'utility', 'vector', 'vector.h',
     59     ])
     60 
     61 
     62 # Non-STL C++ system headers.
     63 _CPP_HEADERS = frozenset([
     64     'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
     65     'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
     66     'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
     67     'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
     68     'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
     69     'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
     70     'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
     71     'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
     72     'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
     73     'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
     74     'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
     75     'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
     76     'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
     77     ])
     78 
     79 
     80 # Assertion macros.  These are defined in base/logging.h and
     81 # testing/base/gunit.h.  Note that the _M versions need to come first
     82 # for substring matching to work.
     83 _CHECK_MACROS = [
     84     'DCHECK', 'CHECK',
     85     'EXPECT_TRUE_M', 'EXPECT_TRUE',
     86     'ASSERT_TRUE_M', 'ASSERT_TRUE',
     87     'EXPECT_FALSE_M', 'EXPECT_FALSE',
     88     'ASSERT_FALSE_M', 'ASSERT_FALSE',
     89     ]
     90 
     91 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
     92 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
     93 
     94 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
     95                         ('>=', 'GE'), ('>', 'GT'),
     96                         ('<=', 'LE'), ('<', 'LT')]:
     97     _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
     98     _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
     99     _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    100     _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    101     _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
    102     _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
    103 
    104 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
    105                             ('>=', 'LT'), ('>', 'LE'),
    106                             ('<=', 'GT'), ('<', 'GE')]:
    107     _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    108     _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    109     _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
    110     _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
    111 
    112 
    113 # These constants define types of headers for use with
    114 # _IncludeState.check_next_include_order().
    115 _CONFIG_HEADER = 0
    116 _PRIMARY_HEADER = 1
    117 _OTHER_HEADER = 2
    118 _MOC_HEADER = 3
    119 
    120 
    121 # The regexp compilation caching is inlined in all regexp functions for
    122 # performance reasons; factoring it out into a separate function turns out
    123 # to be noticeably expensive.
    124 _regexp_compile_cache = {}
    125 
    126 
    127 def match(pattern, s):
    128     """Matches the string with the pattern, caching the compiled regexp."""
    129     if not pattern in _regexp_compile_cache:
    130         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    131     return _regexp_compile_cache[pattern].match(s)
    132 
    133 
    134 def search(pattern, s):
    135     """Searches the string for the pattern, caching the compiled regexp."""
    136     if not pattern in _regexp_compile_cache:
    137         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    138     return _regexp_compile_cache[pattern].search(s)
    139 
    140 
    141 def sub(pattern, replacement, s):
    142     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    143     if not pattern in _regexp_compile_cache:
    144         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    145     return _regexp_compile_cache[pattern].sub(replacement, s)
    146 
    147 
    148 def subn(pattern, replacement, s):
    149     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    150     if not pattern in _regexp_compile_cache:
    151         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    152     return _regexp_compile_cache[pattern].subn(replacement, s)
    153 
    154 
    155 def iteratively_replace_matches_with_char(pattern, char_replacement, s):
    156     """Returns the string with replacement done.
    157 
    158     Every character in the match is replaced with char.
    159     Due to the iterative nature, pattern should not match char or
    160     there will be an infinite loop.
    161 
    162     Example:
    163       pattern = r'<[^>]>' # template parameters
    164       char_replacement =  '_'
    165       s =     'A<B<C, D>>'
    166       Returns 'A_________'
    167 
    168     Args:
    169       pattern: The regex to match.
    170       char_replacement: The character to put in place of every
    171                         character of the match.
    172       s: The string on which to do the replacements.
    173 
    174     Returns:
    175       True, if the given line is blank.
    176     """
    177     while True:
    178         matched = search(pattern, s)
    179         if not matched:
    180             return s
    181         start_match_index = matched.start(0)
    182         end_match_index = matched.end(0)
    183         match_length = end_match_index - start_match_index
    184         s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:]
    185 
    186 
    187 def _find_in_lines(regex, lines, start_position, not_found_position):
    188     """Does a find starting at start position and going forward until
    189     a match is found.
    190 
    191     Returns the position where the regex started.
    192     """
    193     current_row = start_position.row
    194 
    195     # Start with the given row and trim off everything before what should be matched.
    196     current_line = lines[start_position.row][start_position.column:]
    197     starting_offset = start_position.column
    198     while True:
    199         found_match = search(regex, current_line)
    200         if found_match:
    201             return Position(current_row, starting_offset + found_match.start())
    202 
    203         # A match was not found so continue forward.
    204         current_row += 1
    205         starting_offset = 0
    206         if current_row >= len(lines):
    207             return not_found_position
    208         current_line = lines[current_row]
    209 
    210 def _rfind_in_lines(regex, lines, start_position, not_found_position):
    211     """Does a reverse find starting at start position and going backwards until
    212     a match is found.
    213 
    214     Returns the position where the regex ended.
    215     """
    216     # Put the regex in a group and proceed it with a greedy expression that
    217     # matches anything to ensure that we get the last possible match in a line.
    218     last_in_line_regex = r'.*(' + regex + ')'
    219     current_row = start_position.row
    220 
    221     # Start with the given row and trim off everything past what may be matched.
    222     current_line = lines[start_position.row][:start_position.column]
    223     while True:
    224         found_match = match(last_in_line_regex, current_line)
    225         if found_match:
    226             return Position(current_row, found_match.end(1))
    227 
    228         # A match was not found so continue backward.
    229         current_row -= 1
    230         if current_row < 0:
    231             return not_found_position
    232         current_line = lines[current_row]
    233 
    234 
    235 def _convert_to_lower_with_underscores(text):
    236     """Converts all text strings in camelCase or PascalCase to lowers with underscores."""
    237 
    238     # First add underscores before any capital letter followed by a lower case letter
    239     # as long as it is in a word.
    240     # (This put an underscore before Password but not P and A in WPAPassword).
    241     text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text)
    242 
    243     # Next add underscores before capitals at the end of words if it was
    244     # preceeded by lower case letter or number.
    245     # (This puts an underscore before A in isA but not A in CBA).
    246     text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text)
    247 
    248     # Next add underscores when you have a captial letter which is followed by a capital letter
    249     # but is not proceeded by one. (This puts an underscore before A in 'WordADay').
    250     text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text)
    251 
    252     return text.lower()
    253 
    254 
    255 
    256 def _create_acronym(text):
    257     """Creates an acronym for the given text."""
    258     # Removes all lower case letters except those starting words.
    259     text = sub(r'(?<!\b)[a-z]', '', text)
    260     return text.upper()
    261 
    262 
    263 def up_to_unmatched_closing_paren(s):
    264     """Splits a string into two parts up to first unmatched ')'.
    265 
    266     Args:
    267       s: a string which is a substring of line after '('
    268       (e.g., "a == (b + c))").
    269 
    270     Returns:
    271       A pair of strings (prefix before first unmatched ')',
    272       remainder of s after first unmatched ')'), e.g.,
    273       up_to_unmatched_closing_paren("a == (b + c)) { ")
    274       returns "a == (b + c)", " {".
    275       Returns None, None if there is no unmatched ')'
    276 
    277     """
    278     i = 1
    279     for pos, c in enumerate(s):
    280       if c == '(':
    281         i += 1
    282       elif c == ')':
    283         i -= 1
    284         if i == 0:
    285           return s[:pos], s[pos + 1:]
    286     return None, None
    287 
    288 class _IncludeState(dict):
    289     """Tracks line numbers for includes, and the order in which includes appear.
    290 
    291     As a dict, an _IncludeState object serves as a mapping between include
    292     filename and line number on which that file was included.
    293 
    294     Call check_next_include_order() once for each header in the file, passing
    295     in the type constants defined above. Calls in an illegal order will
    296     raise an _IncludeError with an appropriate error message.
    297 
    298     """
    299     # self._section will move monotonically through this set. If it ever
    300     # needs to move backwards, check_next_include_order will raise an error.
    301     _INITIAL_SECTION = 0
    302     _CONFIG_SECTION = 1
    303     _PRIMARY_SECTION = 2
    304     _OTHER_SECTION = 3
    305 
    306     _TYPE_NAMES = {
    307         _CONFIG_HEADER: 'WebCore config.h',
    308         _PRIMARY_HEADER: 'header this file implements',
    309         _OTHER_HEADER: 'other header',
    310         _MOC_HEADER: 'moc file',
    311         }
    312     _SECTION_NAMES = {
    313         _INITIAL_SECTION: "... nothing.",
    314         _CONFIG_SECTION: "WebCore config.h.",
    315         _PRIMARY_SECTION: 'a header this file implements.',
    316         _OTHER_SECTION: 'other header.',
    317         }
    318 
    319     def __init__(self):
    320         dict.__init__(self)
    321         self._section = self._INITIAL_SECTION
    322         self._visited_primary_section = False
    323         self.header_types = dict();
    324 
    325     def visited_primary_section(self):
    326         return self._visited_primary_section
    327 
    328     def check_next_include_order(self, header_type, file_is_header, primary_header_exists):
    329         """Returns a non-empty error message if the next header is out of order.
    330 
    331         This function also updates the internal state to be ready to check
    332         the next include.
    333 
    334         Args:
    335           header_type: One of the _XXX_HEADER constants defined above.
    336           file_is_header: Whether the file that owns this _IncludeState is itself a header
    337 
    338         Returns:
    339           The empty string if the header is in the right order, or an
    340           error message describing what's wrong.
    341 
    342         """
    343         if header_type == _CONFIG_HEADER and file_is_header:
    344             return 'Header file should not contain WebCore config.h.'
    345         if header_type == _PRIMARY_HEADER and file_is_header:
    346             return 'Header file should not contain itself.'
    347         if header_type == _MOC_HEADER:
    348             return ''
    349 
    350         error_message = ''
    351         if self._section != self._OTHER_SECTION:
    352             before_error_message = ('Found %s before %s' %
    353                                     (self._TYPE_NAMES[header_type],
    354                                      self._SECTION_NAMES[self._section + 1]))
    355         after_error_message = ('Found %s after %s' %
    356                                 (self._TYPE_NAMES[header_type],
    357                                  self._SECTION_NAMES[self._section]))
    358 
    359         if header_type == _CONFIG_HEADER:
    360             if self._section >= self._CONFIG_SECTION:
    361                 error_message = after_error_message
    362             self._section = self._CONFIG_SECTION
    363         elif header_type == _PRIMARY_HEADER:
    364             if self._section >= self._PRIMARY_SECTION:
    365                 error_message = after_error_message
    366             elif self._section < self._CONFIG_SECTION:
    367                 error_message = before_error_message
    368             self._section = self._PRIMARY_SECTION
    369             self._visited_primary_section = True
    370         else:
    371             assert header_type == _OTHER_HEADER
    372             if not file_is_header and self._section < self._PRIMARY_SECTION:
    373                 if primary_header_exists:
    374                     error_message = before_error_message
    375             self._section = self._OTHER_SECTION
    376 
    377         return error_message
    378 
    379 
    380 class Position(object):
    381     """Holds the position of something."""
    382     def __init__(self, row, column):
    383         self.row = row
    384         self.column = column
    385 
    386     def __str__(self):
    387         return '(%s, %s)' % (self.row, self.column)
    388 
    389     def __cmp__(self, other):
    390         return self.row.__cmp__(other.row) or self.column.__cmp__(other.column)
    391 
    392 
    393 class Parameter(object):
    394     """Information about one function parameter."""
    395     def __init__(self, parameter, parameter_name_index, row):
    396         self.type = parameter[:parameter_name_index].strip()
    397         # Remove any initializers from the parameter name (e.g. int i = 5).
    398         self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip()
    399         self.row = row
    400 
    401     @memoized
    402     def lower_with_underscores_name(self):
    403         """Returns the parameter name in the lower with underscores format."""
    404         return _convert_to_lower_with_underscores(self.name)
    405 
    406 
    407 class SingleLineView(object):
    408     """Converts multiple lines into a single line (with line breaks replaced by a
    409        space) to allow for easier searching."""
    410     def __init__(self, lines, start_position, end_position):
    411         """Create a SingleLineView instance.
    412 
    413         Args:
    414           lines: a list of multiple lines to combine into a single line.
    415           start_position: offset within lines of where to start the single line.
    416           end_position: just after where to end (like a slice operation).
    417         """
    418         # Get the rows of interest.
    419         trimmed_lines = lines[start_position.row:end_position.row + 1]
    420 
    421         # Remove the columns on the last line that aren't included.
    422         trimmed_lines[-1] = trimmed_lines[-1][:end_position.column]
    423 
    424         # Remove the columns on the first line that aren't included.
    425         trimmed_lines[0] = trimmed_lines[0][start_position.column:]
    426 
    427         # Create a single line with all of the parameters.
    428         self.single_line = ' '.join(trimmed_lines)
    429 
    430         # Keep the row lengths, so we can calculate the original row number
    431         # given a column in the single line (adding 1 due to the space added
    432         # during the join).
    433         self._row_lengths = [len(line) + 1 for line in trimmed_lines]
    434         self._starting_row = start_position.row
    435 
    436     def convert_column_to_row(self, single_line_column_number):
    437         """Convert the column number from the single line into the original
    438         line number.
    439 
    440         Special cases:
    441         * Columns in the added spaces are considered part of the previous line.
    442         * Columns beyond the end of the line are consider part the last line
    443         in the view."""
    444         total_columns = 0
    445         row_offset = 0
    446         while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]:
    447             total_columns += self._row_lengths[row_offset]
    448             row_offset += 1
    449         return self._starting_row + row_offset
    450 
    451 
    452 def create_skeleton_parameters(all_parameters):
    453     """Converts a parameter list to a skeleton version.
    454 
    455     The skeleton only has one word for the parameter name, one word for the type,
    456     and commas after each parameter and only there. Everything in the skeleton
    457     remains in the same columns as the original."""
    458     all_simplifications = (
    459         # Remove template parameters, function declaration parameters, etc.
    460         r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})',
    461         # Remove all initializers.
    462         r'=[^,]*',
    463         # Remove :: and everything before it.
    464         r'[^,]*::',
    465         # Remove modifiers like &, *.
    466         r'[&*]',
    467         # Remove const modifiers.
    468         r'\bconst\s+(?=[A-Za-z])',
    469         # Remove numerical modifiers like long.
    470         r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)')
    471 
    472     skeleton_parameters = all_parameters
    473     for simplification in all_simplifications:
    474         skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters)
    475     # If there are any parameters, then add a , after the last one to
    476     # make a regular pattern of a , following every parameter.
    477     if skeleton_parameters.strip():
    478         skeleton_parameters += ','
    479     return skeleton_parameters
    480 
    481 
    482 def find_parameter_name_index(skeleton_parameter):
    483     """Determines where the parametere name starts given the skeleton parameter."""
    484     # The first space from the right in the simplified parameter is where the parameter
    485     # name starts unless the first space is before any content in the simplified parameter.
    486     before_name_index = skeleton_parameter.rstrip().rfind(' ')
    487     if before_name_index != -1 and skeleton_parameter[:before_name_index].strip():
    488         return before_name_index + 1
    489     return len(skeleton_parameter)
    490 
    491 
    492 def parameter_list(elided_lines, start_position, end_position):
    493     """Generator for a function's parameters."""
    494     # Create new positions that omit the outer parenthesis of the parameters.
    495     start_position = Position(row=start_position.row, column=start_position.column + 1)
    496     end_position = Position(row=end_position.row, column=end_position.column - 1)
    497     single_line_view = SingleLineView(elided_lines, start_position, end_position)
    498     skeleton_parameters = create_skeleton_parameters(single_line_view.single_line)
    499     end_index = -1
    500 
    501     while True:
    502         # Find the end of the next parameter.
    503         start_index = end_index + 1
    504         end_index = skeleton_parameters.find(',', start_index)
    505 
    506         # No comma means that all parameters have been parsed.
    507         if end_index == -1:
    508             return
    509         row = single_line_view.convert_column_to_row(end_index)
    510 
    511         # Parse the parameter into a type and parameter name.
    512         skeleton_parameter = skeleton_parameters[start_index:end_index]
    513         name_offset = find_parameter_name_index(skeleton_parameter)
    514         parameter = single_line_view.single_line[start_index:end_index]
    515         yield Parameter(parameter, name_offset, row)
    516 
    517 
    518 class _FunctionState(object):
    519     """Tracks current function name and the number of lines in its body.
    520 
    521     Attributes:
    522       min_confidence: The minimum confidence level to use while checking style.
    523 
    524     """
    525 
    526     _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    527     _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
    528 
    529     def __init__(self, min_confidence):
    530         self.min_confidence = min_confidence
    531         self.current_function = ''
    532         self.in_a_function = False
    533         self.lines_in_function = 0
    534         # Make sure these will not be mistaken for real positions (even when a
    535         # small amount is added to them).
    536         self.body_start_position = Position(-1000, 0)
    537         self.end_position = Position(-1000, 0)
    538 
    539     def begin(self, function_name, function_name_start_position, body_start_position, end_position,
    540               parameter_start_position, parameter_end_position, clean_lines):
    541         """Start analyzing function body.
    542 
    543         Args:
    544             function_name: The name of the function being tracked.
    545             function_name_start_position: Position in elided where the function name starts.
    546             body_start_position: Position in elided of the { or the ; for a prototype.
    547             end_position: Position in elided just after the final } (or ; is.
    548             parameter_start_position: Position in elided of the '(' for the parameters.
    549             parameter_end_position: Position in elided just after the ')' for the parameters.
    550             clean_lines: A CleansedLines instance containing the file.
    551         """
    552         self.in_a_function = True
    553         self.lines_in_function = -1  # Don't count the open brace line.
    554         self.current_function = function_name
    555         self.function_name_start_position = function_name_start_position
    556         self.body_start_position = body_start_position
    557         self.end_position = end_position
    558         self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';'
    559         self.parameter_start_position = parameter_start_position
    560         self.parameter_end_position = parameter_end_position
    561         self.is_pure = False
    562         if self.is_declaration:
    563             characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line
    564             self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters))
    565         self._clean_lines = clean_lines
    566         self._parameter_list = None
    567 
    568     def modifiers_and_return_type(self):
    569         """Returns the modifiers and the return type."""
    570         # Go backwards from where the function name is until we encounter one of several things:
    571         #   ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0)
    572         elided = self._clean_lines.elided
    573         start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)',
    574                                           elided, self.parameter_start_position, Position(0, 0))
    575         return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip()
    576 
    577     def parameter_list(self):
    578         if not self._parameter_list:
    579             # Store the final result as a tuple since that is immutable.
    580             self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position))
    581 
    582         return self._parameter_list
    583 
    584     def count(self, line_number):
    585         """Count line in current function body."""
    586         if self.in_a_function and line_number >= self.body_start_position.row:
    587             self.lines_in_function += 1
    588 
    589     def check(self, error, line_number):
    590         """Report if too many lines in function body.
    591 
    592         Args:
    593           error: The function to call with any errors found.
    594           line_number: The number of the line to check.
    595         """
    596         if match(r'T(EST|est)', self.current_function):
    597             base_trigger = self._TEST_TRIGGER
    598         else:
    599             base_trigger = self._NORMAL_TRIGGER
    600         trigger = base_trigger * 2 ** self.min_confidence
    601 
    602         if self.lines_in_function > trigger:
    603             error_level = int(math.log(self.lines_in_function / base_trigger, 2))
    604             # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
    605             if error_level > 5:
    606                 error_level = 5
    607             error(line_number, 'readability/fn_size', error_level,
    608                   'Small and focused functions are preferred:'
    609                   ' %s has %d non-comment lines'
    610                   ' (error triggered by exceeding %d lines).'  % (
    611                       self.current_function, self.lines_in_function, trigger))
    612 
    613     def end(self):
    614         """Stop analyzing function body."""
    615         self.in_a_function = False
    616 
    617 
    618 class _IncludeError(Exception):
    619     """Indicates a problem with the include order in a file."""
    620     pass
    621 
    622 
    623 class FileInfo:
    624     """Provides utility functions for filenames.
    625 
    626     FileInfo provides easy access to the components of a file's path
    627     relative to the project root.
    628     """
    629 
    630     def __init__(self, filename):
    631         self._filename = filename
    632 
    633     def full_name(self):
    634         """Make Windows paths like Unix."""
    635         return os.path.abspath(self._filename).replace('\\', '/')
    636 
    637     def repository_name(self):
    638         """Full name after removing the local path to the repository.
    639 
    640         If we have a real absolute path name here we can try to do something smart:
    641         detecting the root of the checkout and truncating /path/to/checkout from
    642         the name so that we get header guards that don't include things like
    643         "C:\Documents and Settings\..." or "/home/username/..." in them and thus
    644         people on different computers who have checked the source out to different
    645         locations won't see bogus errors.
    646         """
    647         fullname = self.full_name()
    648 
    649         if os.path.exists(fullname):
    650             project_dir = os.path.dirname(fullname)
    651 
    652             if os.path.exists(os.path.join(project_dir, ".svn")):
    653                 # If there's a .svn file in the current directory, we
    654                 # recursively look up the directory tree for the top
    655                 # of the SVN checkout
    656                 root_dir = project_dir
    657                 one_up_dir = os.path.dirname(root_dir)
    658                 while os.path.exists(os.path.join(one_up_dir, ".svn")):
    659                     root_dir = os.path.dirname(root_dir)
    660                     one_up_dir = os.path.dirname(one_up_dir)
    661 
    662                 prefix = os.path.commonprefix([root_dir, project_dir])
    663                 return fullname[len(prefix) + 1:]
    664 
    665             # Not SVN? Try to find a git top level directory by
    666             # searching up from the current path.
    667             root_dir = os.path.dirname(fullname)
    668             while (root_dir != os.path.dirname(root_dir)
    669                    and not os.path.exists(os.path.join(root_dir, ".git"))):
    670                 root_dir = os.path.dirname(root_dir)
    671                 if os.path.exists(os.path.join(root_dir, ".git")):
    672                     prefix = os.path.commonprefix([root_dir, project_dir])
    673                     return fullname[len(prefix) + 1:]
    674 
    675         # Don't know what to do; header guard warnings may be wrong...
    676         return fullname
    677 
    678     def split(self):
    679         """Splits the file into the directory, basename, and extension.
    680 
    681         For 'chrome/browser/browser.cpp', Split() would
    682         return ('chrome/browser', 'browser', '.cpp')
    683 
    684         Returns:
    685           A tuple of (directory, basename, extension).
    686         """
    687 
    688         googlename = self.repository_name()
    689         project, rest = os.path.split(googlename)
    690         return (project,) + os.path.splitext(rest)
    691 
    692     def base_name(self):
    693         """File base name - text after the final slash, before the final period."""
    694         return self.split()[1]
    695 
    696     def extension(self):
    697         """File extension - text following the final period."""
    698         return self.split()[2]
    699 
    700     def no_extension(self):
    701         """File has no source file extension."""
    702         return '/'.join(self.split()[0:2])
    703 
    704     def is_source(self):
    705         """File has a source file extension."""
    706         return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
    707 
    708 
    709 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
    710 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
    711     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
    712 # Matches strings.  Escape codes should already be removed by ESCAPES.
    713 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
    714 # Matches characters.  Escape codes should already be removed by ESCAPES.
    715 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
    716 # Matches multi-line C++ comments.
    717 # This RE is a little bit more complicated than one might expect, because we
    718 # have to take care of space removals tools so we can handle comments inside
    719 # statements better.
    720 # The current rule is: We only clear spaces from both sides when we're at the
    721 # end of the line. Otherwise, we try to remove spaces from the right side,
    722 # if this doesn't work we try on left side but only if there's a non-character
    723 # on the right.
    724 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
    725     r"""(\s*/\*.*\*/\s*$|
    726             /\*.*\*/\s+|
    727          \s+/\*.*\*/(?=\W)|
    728             /\*.*\*/)""", re.VERBOSE)
    729 
    730 
    731 def is_cpp_string(line):
    732     """Does line terminate so, that the next symbol is in string constant.
    733 
    734     This function does not consider single-line nor multi-line comments.
    735 
    736     Args:
    737       line: is a partial line of code starting from the 0..n.
    738 
    739     Returns:
    740       True, if next character appended to 'line' is inside a
    741       string constant.
    742     """
    743 
    744     line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
    745     return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
    746 
    747 
    748 def find_next_multi_line_comment_start(lines, line_index):
    749     """Find the beginning marker for a multiline comment."""
    750     while line_index < len(lines):
    751         if lines[line_index].strip().startswith('/*'):
    752             # Only return this marker if the comment goes beyond this line
    753             if lines[line_index].strip().find('*/', 2) < 0:
    754                 return line_index
    755         line_index += 1
    756     return len(lines)
    757 
    758 
    759 def find_next_multi_line_comment_end(lines, line_index):
    760     """We are inside a comment, find the end marker."""
    761     while line_index < len(lines):
    762         if lines[line_index].strip().endswith('*/'):
    763             return line_index
    764         line_index += 1
    765     return len(lines)
    766 
    767 
    768 def remove_multi_line_comments_from_range(lines, begin, end):
    769     """Clears a range of lines for multi-line comments."""
    770     # Having // dummy comments makes the lines non-empty, so we will not get
    771     # unnecessary blank line warnings later in the code.
    772     for i in range(begin, end):
    773         lines[i] = '// dummy'
    774 
    775 
    776 def remove_multi_line_comments(lines, error):
    777     """Removes multiline (c-style) comments from lines."""
    778     line_index = 0
    779     while line_index < len(lines):
    780         line_index_begin = find_next_multi_line_comment_start(lines, line_index)
    781         if line_index_begin >= len(lines):
    782             return
    783         line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
    784         if line_index_end >= len(lines):
    785             error(line_index_begin + 1, 'readability/multiline_comment', 5,
    786                   'Could not find end of multi-line comment')
    787             return
    788         remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
    789         line_index = line_index_end + 1
    790 
    791 
    792 def cleanse_comments(line):
    793     """Removes //-comments and single-line C-style /* */ comments.
    794 
    795     Args:
    796       line: A line of C++ source.
    797 
    798     Returns:
    799       The line with single-line comments removed.
    800     """
    801     comment_position = line.find('//')
    802     if comment_position != -1 and not is_cpp_string(line[:comment_position]):
    803         line = line[:comment_position]
    804     # get rid of /* ... */
    805     return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
    806 
    807 
    808 class CleansedLines(object):
    809     """Holds 3 copies of all lines with different preprocessing applied to them.
    810 
    811     1) elided member contains lines without strings and comments,
    812     2) lines member contains lines without comments, and
    813     3) raw member contains all the lines without processing.
    814     All these three members are of <type 'list'>, and of the same length.
    815     """
    816 
    817     def __init__(self, lines):
    818         self.elided = []
    819         self.lines = []
    820         self.raw_lines = lines
    821         self._num_lines = len(lines)
    822         for line_number in range(len(lines)):
    823             self.lines.append(cleanse_comments(lines[line_number]))
    824             elided = self.collapse_strings(lines[line_number])
    825             self.elided.append(cleanse_comments(elided))
    826 
    827     def num_lines(self):
    828         """Returns the number of lines represented."""
    829         return self._num_lines
    830 
    831     @staticmethod
    832     def collapse_strings(elided):
    833         """Collapses strings and chars on a line to simple "" or '' blocks.
    834 
    835         We nix strings first so we're not fooled by text like '"http://"'
    836 
    837         Args:
    838           elided: The line being processed.
    839 
    840         Returns:
    841           The line with collapsed strings.
    842         """
    843         if not _RE_PATTERN_INCLUDE.match(elided):
    844             # Remove escaped characters first to make quote/single quote collapsing
    845             # basic.  Things that look like escaped characters shouldn't occur
    846             # outside of strings and chars.
    847             elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
    848             elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
    849             elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
    850         return elided
    851 
    852 
    853 def close_expression(elided, position):
    854     """If input points to ( or { or [, finds the position that closes it.
    855 
    856     If elided[position.row][position.column] points to a '(' or '{' or '[',
    857     finds the line_number/pos that correspond to the closing of the expression.
    858 
    859      Args:
    860        elided: A CleansedLines.elided instance containing the file.
    861        position: The position of the opening item.
    862 
    863      Returns:
    864       The Position *past* the closing brace, or Position(len(elided), -1)
    865       if we never find a close. Note we ignore strings and comments when matching.
    866     """
    867     line = elided[position.row]
    868     start_character = line[position.column]
    869     if start_character == '(':
    870         enclosing_character_regex = r'[\(\)]'
    871     elif start_character == '[':
    872         enclosing_character_regex = r'[\[\]]'
    873     elif start_character == '{':
    874         enclosing_character_regex = r'[\{\}]'
    875     else:
    876         return Position(len(elided), -1)
    877 
    878     current_column = position.column + 1
    879     line_number = position.row
    880     net_open = 1
    881     for line in elided[position.row:]:
    882         line = line[current_column:]
    883 
    884         # Search the current line for opening and closing characters.
    885         while True:
    886             next_enclosing_character = search(enclosing_character_regex, line)
    887             # No more on this line.
    888             if not next_enclosing_character:
    889                 break
    890             current_column += next_enclosing_character.end(0)
    891             line = line[next_enclosing_character.end(0):]
    892             if next_enclosing_character.group(0) == start_character:
    893                 net_open += 1
    894             else:
    895                 net_open -= 1
    896                 if not net_open:
    897                     return Position(line_number, current_column)
    898 
    899         # Proceed to the next line.
    900         line_number += 1
    901         current_column = 0
    902 
    903     # The given item was not closed.
    904     return Position(len(elided), -1)
    905 
    906 def check_for_copyright(lines, error):
    907     """Logs an error if no Copyright message appears at the top of the file."""
    908 
    909     # We'll say it should occur by line 10. Don't forget there's a
    910     # dummy line at the front.
    911     for line in xrange(1, min(len(lines), 11)):
    912         if re.search(r'Copyright', lines[line], re.I):
    913             break
    914     else:                       # means no copyright line was found
    915         error(0, 'legal/copyright', 5,
    916               'No copyright message found.  '
    917               'You should have a line: "Copyright [year] <Copyright Owner>"')
    918 
    919 
    920 # TODO(jww) After the transition of Blink into the Chromium repo, this function
    921 # should be removed. This will strictly enforce Chromium-style header guards,
    922 # rather than allowing traditional WebKit header guards and Chromium-style
    923 # simultaneously.
    924 def get_legacy_header_guard_cpp_variable(filename):
    925     """Returns the CPP variable that should be used as a header guard.
    926 
    927     Args:
    928       filename: The name of a C++ header file.
    929 
    930     Returns:
    931       The CPP variable that should be used as a header guard in the
    932       named file.
    933 
    934     """
    935 
    936     # Restores original filename in case that style checker is invoked from Emacs's
    937     # flymake.
    938     filename = re.sub(r'_flymake\.h$', '.h', filename)
    939 
    940     standard_name = sub(r'[-.\s]', '_', os.path.basename(filename))
    941 
    942     # Files under WTF typically have header guards that start with WTF_.
    943     if '/wtf/' in filename:
    944         special_name = "WTF_" + standard_name
    945     else:
    946         special_name = standard_name
    947     return (special_name, standard_name)
    948 
    949 
    950 def get_header_guard_cpp_variable(filename):
    951     """Returns the CPP variable that should be used as a header guard in Chromium-style.
    952 
    953     Args:
    954       filename: The name of a C++ header file.
    955 
    956     Returns:
    957       The CPP variable that should be used as a header guard in the
    958       named file in Chromium-style.
    959 
    960     """
    961 
    962     # Restores original filename in case that style checker is invoked from Emacs's
    963     # flymake.
    964     filename = re.sub(r'_flymake\.h$', '.h', filename)
    965 
    966     # If it's a full path and starts with Source/, replace Source with blink
    967     # since that will be the new style directory.
    968     filename = sub(r'^Source\/', 'blink/', filename)
    969 
    970     standard_name = sub(r'[-.\s\/]', '_', filename).upper() + '_'
    971 
    972     return standard_name
    973 
    974 
    975 def check_for_header_guard(filename, lines, error):
    976     """Checks that the file contains a header guard.
    977 
    978     Logs an error if no #ifndef header guard is present.  For other
    979     headers, checks that the full pathname is used.
    980 
    981     Args:
    982       filename: The name of the C++ header file.
    983       lines: An array of strings, each representing a line of the file.
    984       error: The function to call with any errors found.
    985     """
    986 
    987     legacy_cpp_var = get_legacy_header_guard_cpp_variable(filename)
    988     cpp_var = get_header_guard_cpp_variable(filename)
    989 
    990     ifndef = None
    991     ifndef_line_number = 0
    992     define = None
    993     for line_number, line in enumerate(lines):
    994         line_split = line.split()
    995         if len(line_split) >= 2:
    996             # find the first occurrence of #ifndef and #define, save arg
    997             if not ifndef and line_split[0] == '#ifndef':
    998                 # set ifndef to the header guard presented on the #ifndef line.
    999                 ifndef = line_split[1]
   1000                 ifndef_line_number = line_number
   1001             if not define and line_split[0] == '#define':
   1002                 define = line_split[1]
   1003             if define and ifndef:
   1004                 break
   1005 
   1006     if not ifndef or not define or ifndef != define:
   1007         error(0, 'build/header_guard', 5,
   1008               'No #ifndef header guard found, suggested CPP variable is: %s' %
   1009               legacy_cpp_var[0])
   1010         return
   1011 
   1012     # The guard should be File_h or, for Chromium style, BLINK_PATH_TO_FILE_H_.
   1013     if ifndef not in legacy_cpp_var and ifndef != cpp_var:
   1014         error(ifndef_line_number, 'build/header_guard', 5,
   1015               '#ifndef header guard has wrong style, please use: %s' % legacy_cpp_var[0])
   1016 
   1017 
   1018 def check_for_unicode_replacement_characters(lines, error):
   1019     """Logs an error for each line containing Unicode replacement characters.
   1020 
   1021     These indicate that either the file contained invalid UTF-8 (likely)
   1022     or Unicode replacement characters (which it shouldn't).  Note that
   1023     it's possible for this to throw off line numbering if the invalid
   1024     UTF-8 occurred adjacent to a newline.
   1025 
   1026     Args:
   1027       lines: An array of strings, each representing a line of the file.
   1028       error: The function to call with any errors found.
   1029     """
   1030     for line_number, line in enumerate(lines):
   1031         if u'\ufffd' in line:
   1032             error(line_number, 'readability/utf8', 5,
   1033                   'Line contains invalid UTF-8 (or Unicode replacement character).')
   1034 
   1035 
   1036 def check_for_new_line_at_eof(lines, error):
   1037     """Logs an error if there is no newline char at the end of the file.
   1038 
   1039     Args:
   1040       lines: An array of strings, each representing a line of the file.
   1041       error: The function to call with any errors found.
   1042     """
   1043 
   1044     # The array lines() was created by adding two newlines to the
   1045     # original file (go figure), then splitting on \n.
   1046     # To verify that the file ends in \n, we just have to make sure the
   1047     # last-but-two element of lines() exists and is empty.
   1048     if len(lines) < 3 or lines[-2]:
   1049         error(len(lines) - 2, 'whitespace/ending_newline', 5,
   1050               'Could not find a newline character at the end of the file.')
   1051 
   1052 
   1053 def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
   1054     """Logs an error if we see /* ... */ or "..." that extend past one line.
   1055 
   1056     /* ... */ comments are legit inside macros, for one line.
   1057     Otherwise, we prefer // comments, so it's ok to warn about the
   1058     other.  Likewise, it's ok for strings to extend across multiple
   1059     lines, as long as a line continuation character (backslash)
   1060     terminates each line. Although not currently prohibited by the C++
   1061     style guide, it's ugly and unnecessary. We don't do well with either
   1062     in this lint program, so we warn about both.
   1063 
   1064     Args:
   1065       clean_lines: A CleansedLines instance containing the file.
   1066       line_number: The number of the line to check.
   1067       error: The function to call with any errors found.
   1068     """
   1069     line = clean_lines.elided[line_number]
   1070 
   1071     # Remove all \\ (escaped backslashes) from the line. They are OK, and the
   1072     # second (escaped) slash may trigger later \" detection erroneously.
   1073     line = line.replace('\\\\', '')
   1074 
   1075     if line.count('/*') > line.count('*/'):
   1076         error(line_number, 'readability/multiline_comment', 5,
   1077               'Complex multi-line /*...*/-style comment found. '
   1078               'Lint may give bogus warnings.  '
   1079               'Consider replacing these with //-style comments, '
   1080               'with #if 0...#endif, '
   1081               'or with more clearly structured multi-line comments.')
   1082 
   1083     if (line.count('"') - line.count('\\"')) % 2:
   1084         error(line_number, 'readability/multiline_string', 5,
   1085               'Multi-line string ("...") found.  This lint script doesn\'t '
   1086               'do well with such strings, and may give bogus warnings.  They\'re '
   1087               'ugly and unnecessary, and you should use concatenation instead".')
   1088 
   1089 
   1090 _THREADING_LIST = (
   1091     ('asctime(', 'asctime_r('),
   1092     ('ctime(', 'ctime_r('),
   1093     ('getgrgid(', 'getgrgid_r('),
   1094     ('getgrnam(', 'getgrnam_r('),
   1095     ('getlogin(', 'getlogin_r('),
   1096     ('getpwnam(', 'getpwnam_r('),
   1097     ('getpwuid(', 'getpwuid_r('),
   1098     ('gmtime(', 'gmtime_r('),
   1099     ('localtime(', 'localtime_r('),
   1100     ('rand(', 'rand_r('),
   1101     ('readdir(', 'readdir_r('),
   1102     ('strtok(', 'strtok_r('),
   1103     ('ttyname(', 'ttyname_r('),
   1104     )
   1105 
   1106 
   1107 def check_posix_threading(clean_lines, line_number, error):
   1108     """Checks for calls to thread-unsafe functions.
   1109 
   1110     Much code has been originally written without consideration of
   1111     multi-threading. Also, engineers are relying on their old experience;
   1112     they have learned posix before threading extensions were added. These
   1113     tests guide the engineers to use thread-safe functions (when using
   1114     posix directly).
   1115 
   1116     Args:
   1117       clean_lines: A CleansedLines instance containing the file.
   1118       line_number: The number of the line to check.
   1119       error: The function to call with any errors found.
   1120     """
   1121     line = clean_lines.elided[line_number]
   1122     for single_thread_function, multithread_safe_function in _THREADING_LIST:
   1123         index = line.find(single_thread_function)
   1124         # Comparisons made explicit for clarity
   1125         if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
   1126                                           and line[index - 1] not in ('_', '.', '>'))):
   1127             error(line_number, 'runtime/threadsafe_fn', 2,
   1128                   'Consider using ' + multithread_safe_function +
   1129                   '...) instead of ' + single_thread_function +
   1130                   '...) for improved thread safety.')
   1131 
   1132 
   1133 # Matches invalid increment: *count++, which moves pointer instead of
   1134 # incrementing a value.
   1135 _RE_PATTERN_INVALID_INCREMENT = re.compile(
   1136     r'^\s*\*\w+(\+\+|--);')
   1137 
   1138 
   1139 def check_invalid_increment(clean_lines, line_number, error):
   1140     """Checks for invalid increment *count++.
   1141 
   1142     For example following function:
   1143     void increment_counter(int* count) {
   1144         *count++;
   1145     }
   1146     is invalid, because it effectively does count++, moving pointer, and should
   1147     be replaced with ++*count, (*count)++ or *count += 1.
   1148 
   1149     Args:
   1150       clean_lines: A CleansedLines instance containing the file.
   1151       line_number: The number of the line to check.
   1152       error: The function to call with any errors found.
   1153     """
   1154     line = clean_lines.elided[line_number]
   1155     if _RE_PATTERN_INVALID_INCREMENT.match(line):
   1156         error(line_number, 'runtime/invalid_increment', 5,
   1157               'Changing pointer instead of value (or unused value of operator*).')
   1158 
   1159 
   1160 class _ClassInfo(object):
   1161     """Stores information about a class."""
   1162 
   1163     def __init__(self, name, line_number):
   1164         self.name = name
   1165         self.line_number = line_number
   1166         self.seen_open_brace = False
   1167         self.is_derived = False
   1168         self.virtual_method_line_number = None
   1169         self.has_virtual_destructor = False
   1170         self.brace_depth = 0
   1171         self.unsigned_bitfields = []
   1172         self.bool_bitfields = []
   1173 
   1174 
   1175 class _ClassState(object):
   1176     """Holds the current state of the parse relating to class declarations.
   1177 
   1178     It maintains a stack of _ClassInfos representing the parser's guess
   1179     as to the current nesting of class declarations. The innermost class
   1180     is at the top (back) of the stack. Typically, the stack will either
   1181     be empty or have exactly one entry.
   1182     """
   1183 
   1184     def __init__(self):
   1185         self.classinfo_stack = []
   1186 
   1187     def check_finished(self, error):
   1188         """Checks that all classes have been completely parsed.
   1189 
   1190         Call this when all lines in a file have been processed.
   1191         Args:
   1192           error: The function to call with any errors found.
   1193         """
   1194         if self.classinfo_stack:
   1195             # Note: This test can result in false positives if #ifdef constructs
   1196             # get in the way of brace matching. See the testBuildClass test in
   1197             # cpp_style_unittest.py for an example of this.
   1198             error(self.classinfo_stack[0].line_number, 'build/class', 5,
   1199                   'Failed to find complete declaration of class %s' %
   1200                   self.classinfo_stack[0].name)
   1201 
   1202 
   1203 class _FileState(object):
   1204     def __init__(self, clean_lines, file_extension):
   1205         self._did_inside_namespace_indent_warning = False
   1206         self._clean_lines = clean_lines
   1207         if file_extension in ['m', 'mm']:
   1208             self._is_objective_c = True
   1209             self._is_c = False
   1210         elif file_extension == 'h':
   1211             # In the case of header files, it is unknown if the file
   1212             # is c / objective c or not, so set this value to None and then
   1213             # if it is requested, use heuristics to guess the value.
   1214             self._is_objective_c = None
   1215             self._is_c = None
   1216         elif file_extension == 'c':
   1217             self._is_c = True
   1218             self._is_objective_c = False
   1219         else:
   1220             self._is_objective_c = False
   1221             self._is_c = False
   1222 
   1223     def set_did_inside_namespace_indent_warning(self):
   1224         self._did_inside_namespace_indent_warning = True
   1225 
   1226     def did_inside_namespace_indent_warning(self):
   1227         return self._did_inside_namespace_indent_warning
   1228 
   1229     def is_objective_c(self):
   1230         if self._is_objective_c is None:
   1231             for line in self._clean_lines.elided:
   1232                 # Starting with @ or #import seem like the best indications
   1233                 # that we have an Objective C file.
   1234                 if line.startswith("@") or line.startswith("#import"):
   1235                     self._is_objective_c = True
   1236                     break
   1237             else:
   1238                 self._is_objective_c = False
   1239         return self._is_objective_c
   1240 
   1241     def is_c(self):
   1242         if self._is_c is None:
   1243             for line in self._clean_lines.lines:
   1244                 # if extern "C" is found, then it is a good indication
   1245                 # that we have a C header file.
   1246                 if line.startswith('extern "C"'):
   1247                     self._is_c = True
   1248                     break
   1249             else:
   1250                 self._is_c = False
   1251         return self._is_c
   1252 
   1253     def is_c_or_objective_c(self):
   1254         """Return whether the file extension corresponds to C or Objective-C."""
   1255         return self.is_c() or self.is_objective_c()
   1256 
   1257 
   1258 class _EnumState(object):
   1259     """Maintains whether currently in an enum declaration, and checks whether
   1260     enum declarations follow the style guide.
   1261     """
   1262 
   1263     def __init__(self):
   1264         self.in_enum_decl = False
   1265         self.is_webidl_enum = False
   1266 
   1267     def process_clean_line(self, line):
   1268         # FIXME: The regular expressions for expr_all_uppercase and expr_enum_end only accept integers
   1269         # and identifiers for the value of the enumerator, but do not accept any other constant
   1270         # expressions. However, this is sufficient for now (11/27/2012).
   1271         expr_all_uppercase = r'\s*[A-Z0-9_]+\s*(?:=\s*[a-zA-Z0-9]+\s*)?,?\s*$'
   1272         expr_starts_lowercase = r'\s*[a-z]'
   1273         expr_enum_end = r'}\s*(?:[a-zA-Z0-9]+\s*(?:=\s*[a-zA-Z0-9]+)?)?\s*;\s*'
   1274         expr_enum_start = r'\s*enum(?:\s+[a-zA-Z0-9]+)?\s*\{?\s*'
   1275         if self.in_enum_decl:
   1276             if match(r'\s*' + expr_enum_end + r'$', line):
   1277                 self.in_enum_decl = False
   1278                 self.is_webidl_enum = False
   1279             elif match(expr_all_uppercase, line):
   1280                 return self.is_webidl_enum
   1281             elif match(expr_starts_lowercase, line):
   1282                 return False
   1283         else:
   1284             matched = match(expr_enum_start + r'$', line)
   1285             if matched:
   1286                 self.in_enum_decl = True
   1287             else:
   1288                 matched = match(expr_enum_start + r'(?P<members>.*)' + expr_enum_end + r'$', line)
   1289                 if matched:
   1290                     members = matched.group('members').split(',')
   1291                     found_invalid_member = False
   1292                     for member in members:
   1293                         if match(expr_all_uppercase, member):
   1294                             found_invalid_member = not self.is_webidl_enum
   1295                         if match(expr_starts_lowercase, member):
   1296                             found_invalid_member = True
   1297                         if found_invalid_member:
   1298                             self.is_webidl_enum = False
   1299                             return False
   1300                     return True
   1301         return True
   1302 
   1303 def check_for_non_standard_constructs(clean_lines, line_number,
   1304                                       class_state, error):
   1305     """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
   1306 
   1307     Complain about several constructs which gcc-2 accepts, but which are
   1308     not standard C++.  Warning about these in lint is one way to ease the
   1309     transition to new compilers.
   1310     - put storage class first (e.g. "static const" instead of "const static").
   1311     - "%lld" instead of %qd" in printf-type functions.
   1312     - "%1$d" is non-standard in printf-type functions.
   1313     - "\%" is an undefined character escape sequence.
   1314     - text after #endif is not allowed.
   1315     - invalid inner-style forward declaration.
   1316     - >? and <? operators, and their >?= and <?= cousins.
   1317     - classes with virtual methods need virtual destructors (compiler warning
   1318         available, but not turned on yet.)
   1319 
   1320     Additionally, check for constructor/destructor style violations as it
   1321     is very convenient to do so while checking for gcc-2 compliance.
   1322 
   1323     Args:
   1324       clean_lines: A CleansedLines instance containing the file.
   1325       line_number: The number of the line to check.
   1326       class_state: A _ClassState instance which maintains information about
   1327                    the current stack of nested class declarations being parsed.
   1328       error: A callable to which errors are reported, which takes parameters:
   1329              line number, error level, and message
   1330     """
   1331 
   1332     # Remove comments from the line, but leave in strings for now.
   1333     line = clean_lines.lines[line_number]
   1334 
   1335     if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
   1336         error(line_number, 'runtime/printf_format', 3,
   1337               '%q in format strings is deprecated.  Use %ll instead.')
   1338 
   1339     if search(r'printf\s*\(.*".*%\d+\$', line):
   1340         error(line_number, 'runtime/printf_format', 2,
   1341               '%N$ formats are unconventional.  Try rewriting to avoid them.')
   1342 
   1343     # Remove escaped backslashes before looking for undefined escapes.
   1344     line = line.replace('\\\\', '')
   1345 
   1346     if search(r'("|\').*\\(%|\[|\(|{)', line):
   1347         error(line_number, 'build/printf_format', 3,
   1348               '%, [, (, and { are undefined character escapes.  Unescape them.')
   1349 
   1350     # For the rest, work with both comments and strings removed.
   1351     line = clean_lines.elided[line_number]
   1352 
   1353     if search(r'\b(const|volatile|void|char|short|int|long'
   1354               r'|float|double|signed|unsigned'
   1355               r'|schar|u?int8|u?int16|u?int32|u?int64)'
   1356               r'\s+(auto|register|static|extern|typedef)\b',
   1357               line):
   1358         error(line_number, 'build/storage_class', 5,
   1359               'Storage class (static, extern, typedef, etc) should be first.')
   1360 
   1361     if match(r'\s*#\s*endif\s*[^/\s]+', line):
   1362         error(line_number, 'build/endif_comment', 5,
   1363               'Uncommented text after #endif is non-standard.  Use a comment.')
   1364 
   1365     if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
   1366         error(line_number, 'build/forward_decl', 5,
   1367               'Inner-style forward declarations are invalid.  Remove this line.')
   1368 
   1369     if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
   1370         error(line_number, 'build/deprecated', 3,
   1371               '>? and <? (max and min) operators are non-standard and deprecated.')
   1372 
   1373     # Track class entry and exit, and attempt to find cases within the
   1374     # class declaration that don't meet the C++ style
   1375     # guidelines. Tracking is very dependent on the code matching Google
   1376     # style guidelines, but it seems to perform well enough in testing
   1377     # to be a worthwhile addition to the checks.
   1378     classinfo_stack = class_state.classinfo_stack
   1379     # Look for a class declaration
   1380     class_decl_match = match(
   1381         r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
   1382     if class_decl_match:
   1383         classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
   1384 
   1385     # Everything else in this function uses the top of the stack if it's
   1386     # not empty.
   1387     if not classinfo_stack:
   1388         return
   1389 
   1390     classinfo = classinfo_stack[-1]
   1391 
   1392     # If the opening brace hasn't been seen look for it and also
   1393     # parent class declarations.
   1394     if not classinfo.seen_open_brace:
   1395         # If the line has a ';' in it, assume it's a forward declaration or
   1396         # a single-line class declaration, which we won't process.
   1397         if line.find(';') != -1:
   1398             classinfo_stack.pop()
   1399             return
   1400         classinfo.seen_open_brace = (line.find('{') != -1)
   1401         # Look for a bare ':'
   1402         if search('(^|[^:]):($|[^:])', line):
   1403             classinfo.is_derived = True
   1404         if not classinfo.seen_open_brace:
   1405             return  # Everything else in this function is for after open brace
   1406 
   1407     # The class may have been declared with namespace or classname qualifiers.
   1408     # The constructor and destructor will not have those qualifiers.
   1409     base_classname = classinfo.name.split('::')[-1]
   1410 
   1411     # Look for single-argument constructors that aren't marked explicit.
   1412     # Technically a valid construct, but against style.
   1413     args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
   1414                  % re.escape(base_classname),
   1415                  line)
   1416     if (args
   1417         and args.group(1) != 'void'
   1418         and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
   1419                       args.group(1).strip())):
   1420         error(line_number, 'runtime/explicit', 5,
   1421               'Single-argument constructors should be marked explicit.')
   1422 
   1423     # Look for methods declared virtual.
   1424     if search(r'\bvirtual\b', line):
   1425         classinfo.virtual_method_line_number = line_number
   1426         # Only look for a destructor declaration on the same line. It would
   1427         # be extremely unlikely for the destructor declaration to occupy
   1428         # more than one line.
   1429         if search(r'~%s\s*\(' % base_classname, line):
   1430             classinfo.has_virtual_destructor = True
   1431 
   1432     # Look for class end.
   1433     brace_depth = classinfo.brace_depth
   1434     brace_depth = brace_depth + line.count('{') - line.count('}')
   1435     if brace_depth <= 0:
   1436         classinfo = classinfo_stack.pop()
   1437         # Try to detect missing virtual destructor declarations.
   1438         # For now, only warn if a non-derived class with virtual methods lacks
   1439         # a virtual destructor. This is to make it less likely that people will
   1440         # declare derived virtual destructors without declaring the base
   1441         # destructor virtual.
   1442         if ((classinfo.virtual_method_line_number is not None)
   1443             and (not classinfo.has_virtual_destructor)
   1444             and (not classinfo.is_derived)):  # Only warn for base classes
   1445             error(classinfo.line_number, 'runtime/virtual', 4,
   1446                   'The class %s probably needs a virtual destructor due to '
   1447                   'having virtual method(s), one declared at line %d.'
   1448                   % (classinfo.name, classinfo.virtual_method_line_number))
   1449         # Look for mixed bool and unsigned bitfields.
   1450         if (classinfo.bool_bitfields and classinfo.unsigned_bitfields):
   1451             bool_list = ', '.join(classinfo.bool_bitfields)
   1452             unsigned_list = ', '.join(classinfo.unsigned_bitfields)
   1453             error(classinfo.line_number, 'runtime/bitfields', 5,
   1454                   'The class %s contains mixed unsigned and bool bitfields, '
   1455                   'which will pack into separate words on the MSVC compiler.\n'
   1456                   'Bool bitfields are [%s].\nUnsigned bitfields are [%s].\n'
   1457                   'Consider converting bool bitfields to unsigned.'
   1458                   % (classinfo.name, bool_list, unsigned_list))
   1459     else:
   1460         classinfo.brace_depth = brace_depth
   1461 
   1462     well_typed_bitfield = False;
   1463     # Look for bool <name> : 1 declarations.
   1464     args = search(r'\bbool\s+(\S*)\s*:\s*\d+\s*;', line)
   1465     if args:
   1466         classinfo.bool_bitfields.append('%d: %s' % (line_number, args.group(1)))
   1467         well_typed_bitfield = True;
   1468 
   1469     # Look for unsigned <name> : n declarations.
   1470     args = search(r'\bunsigned\s+(?:int\s+)?(\S+)\s*:\s*\d+\s*;', line)
   1471     if args:
   1472         classinfo.unsigned_bitfields.append('%d: %s' % (line_number, args.group(1)))
   1473         well_typed_bitfield = True;
   1474 
   1475     # Look for other bitfield declarations. We don't care about those in
   1476     # size-matching structs.
   1477     if not (well_typed_bitfield or classinfo.name.startswith('SameSizeAs') or
   1478             classinfo.name.startswith('Expected')):
   1479         args = match(r'\s*(\S+)\s+(\S+)\s*:\s*\d+\s*;', line)
   1480         if args:
   1481             error(line_number, 'runtime/bitfields', 4,
   1482                   'Member %s of class %s defined as a bitfield of type %s. '
   1483                   'Please declare all bitfields as unsigned.'
   1484                   % (args.group(2), classinfo.name, args.group(1)))
   1485 
   1486 def check_spacing_for_function_call(line, line_number, error):
   1487     """Checks for the correctness of various spacing around function calls.
   1488 
   1489     Args:
   1490       line: The text of the line to check.
   1491       line_number: The number of the line to check.
   1492       error: The function to call with any errors found.
   1493     """
   1494 
   1495     # Since function calls often occur inside if/for/foreach/while/switch
   1496     # expressions - which have their own, more liberal conventions - we
   1497     # first see if we should be looking inside such an expression for a
   1498     # function call, to which we can apply more strict standards.
   1499     function_call = line    # if there's no control flow construct, look at whole line
   1500     for pattern in (r'\bif\s*\((.*)\)\s*{',
   1501                     r'\bfor\s*\((.*)\)\s*{',
   1502                     r'\bforeach\s*\((.*)\)\s*{',
   1503                     r'\bwhile\s*\((.*)\)\s*[{;]',
   1504                     r'\bswitch\s*\((.*)\)\s*{'):
   1505         matched = search(pattern, line)
   1506         if matched:
   1507             function_call = matched.group(1)    # look inside the parens for function calls
   1508             break
   1509 
   1510     # Except in if/for/foreach/while/switch, there should never be space
   1511     # immediately inside parens (eg "f( 3, 4 )").  We make an exception
   1512     # for nested parens ( (a+b) + c ).  Likewise, there should never be
   1513     # a space before a ( when it's a function argument.  I assume it's a
   1514     # function argument when the char before the whitespace is legal in
   1515     # a function name (alnum + _) and we're not starting a macro. Also ignore
   1516     # pointers and references to arrays and functions coz they're too tricky:
   1517     # we use a very simple way to recognize these:
   1518     # " (something)(maybe-something)" or
   1519     # " (something)(maybe-something," or
   1520     # " (something)[something]"
   1521     # Note that we assume the contents of [] to be short enough that
   1522     # they'll never need to wrap.
   1523     if (  # Ignore control structures.
   1524         not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
   1525         # Ignore pointers/references to functions.
   1526         and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
   1527         # Ignore pointers/references to arrays.
   1528         and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
   1529         if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
   1530             error(line_number, 'whitespace/parens', 4,
   1531                   'Extra space after ( in function call')
   1532         elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
   1533             error(line_number, 'whitespace/parens', 2,
   1534                   'Extra space after (')
   1535         if (search(r'\w\s+\(', function_call)
   1536             and not match(r'\s*(#|typedef)', function_call)):
   1537             error(line_number, 'whitespace/parens', 4,
   1538                   'Extra space before ( in function call')
   1539         # If the ) is followed only by a newline or a { + newline, assume it's
   1540         # part of a control statement (if/while/etc), and don't complain
   1541         if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
   1542             error(line_number, 'whitespace/parens', 2,
   1543                   'Extra space before )')
   1544 
   1545 
   1546 def is_blank_line(line):
   1547     """Returns true if the given line is blank.
   1548 
   1549     We consider a line to be blank if the line is empty or consists of
   1550     only white spaces.
   1551 
   1552     Args:
   1553       line: A line of a string.
   1554 
   1555     Returns:
   1556       True, if the given line is blank.
   1557     """
   1558     return not line or line.isspace()
   1559 
   1560 
   1561 def detect_functions(clean_lines, line_number, function_state, error):
   1562     """Finds where functions start and end.
   1563 
   1564     Uses a simplistic algorithm assuming other style guidelines
   1565     (especially spacing) are followed.
   1566     Trivial bodies are unchecked, so constructors with huge initializer lists
   1567     may be missed.
   1568 
   1569     Args:
   1570       clean_lines: A CleansedLines instance containing the file.
   1571       line_number: The number of the line to check.
   1572       function_state: Current function name and lines in body so far.
   1573       error: The function to call with any errors found.
   1574     """
   1575     # Are we now past the end of a function?
   1576     if function_state.end_position.row + 1 == line_number:
   1577         function_state.end()
   1578 
   1579     # If we're in a function, don't try to detect a new one.
   1580     if function_state.in_a_function:
   1581         return
   1582 
   1583     lines = clean_lines.lines
   1584     line = lines[line_number]
   1585     raw = clean_lines.raw_lines
   1586     raw_line = raw[line_number]
   1587 
   1588     # Lines ending with a \ indicate a macro. Don't try to check them.
   1589     if raw_line.endswith('\\'):
   1590         return
   1591 
   1592     regexp = r'\s*(\w(\w|::|\*|\&|\s|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\('  # decls * & space::name( ...
   1593     match_result = match(regexp, line)
   1594     if not match_result:
   1595         return
   1596 
   1597     # If the name is all caps and underscores, figure it's a macro and
   1598     # ignore it, unless it's TEST or TEST_F.
   1599     function_name = match_result.group(1).split()[-1]
   1600     if function_name != 'TEST' and function_name != 'TEST_F' and match(r'[A-Z_]+$', function_name):
   1601         return
   1602 
   1603     joined_line = ''
   1604     for start_line_number in xrange(line_number, clean_lines.num_lines()):
   1605         start_line = clean_lines.elided[start_line_number]
   1606         joined_line += ' ' + start_line.lstrip()
   1607         body_match = search(r'{|;', start_line)
   1608         if body_match:
   1609             body_start_position = Position(start_line_number, body_match.start(0))
   1610 
   1611             # Replace template constructs with _ so that no spaces remain in the function name,
   1612             # while keeping the column numbers of other characters the same as "line".
   1613             line_with_no_templates = iteratively_replace_matches_with_char(r'<[^<>]*>', '_', line)
   1614             match_function = search(r'((\w|:|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(', line_with_no_templates)
   1615             if not match_function:
   1616                 return  # The '(' must have been inside of a template.
   1617 
   1618             # Use the column numbers from the modified line to find the
   1619             # function name in the original line.
   1620             function = line[match_function.start(1):match_function.end(1)]
   1621             function_name_start_position = Position(line_number, match_function.start(1))
   1622 
   1623             if match(r'TEST', function):    # Handle TEST... macros
   1624                 parameter_regexp = search(r'(\(.*\))', joined_line)
   1625                 if parameter_regexp:             # Ignore bad syntax
   1626                     function += parameter_regexp.group(1)
   1627             else:
   1628                 function += '()'
   1629 
   1630             parameter_start_position = Position(line_number, match_function.end(1))
   1631             parameter_end_position = close_expression(clean_lines.elided, parameter_start_position)
   1632             if parameter_end_position.row == len(clean_lines.elided):
   1633                 # No end was found.
   1634                 return
   1635 
   1636             if start_line[body_start_position.column] == ';':
   1637                 end_position = Position(body_start_position.row, body_start_position.column + 1)
   1638             else:
   1639                 end_position = close_expression(clean_lines.elided, body_start_position)
   1640 
   1641             # Check for nonsensical positions. (This happens in test cases which check code snippets.)
   1642             if parameter_end_position > body_start_position:
   1643                 return
   1644 
   1645             function_state.begin(function, function_name_start_position, body_start_position, end_position,
   1646                                  parameter_start_position, parameter_end_position, clean_lines)
   1647             return
   1648 
   1649     # No body for the function (or evidence of a non-function) was found.
   1650     error(line_number, 'readability/fn_size', 5,
   1651           'Lint failed to find start of function body.')
   1652 
   1653 
   1654 def check_for_function_lengths(clean_lines, line_number, function_state, error):
   1655     """Reports for long function bodies.
   1656 
   1657     For an overview why this is done, see:
   1658     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
   1659 
   1660     Blank/comment lines are not counted so as to avoid encouraging the removal
   1661     of vertical space and commments just to get through a lint check.
   1662     NOLINT *on the last line of a function* disables this check.
   1663 
   1664     Args:
   1665       clean_lines: A CleansedLines instance containing the file.
   1666       line_number: The number of the line to check.
   1667       function_state: Current function name and lines in body so far.
   1668       error: The function to call with any errors found.
   1669     """
   1670     lines = clean_lines.lines
   1671     line = lines[line_number]
   1672     raw = clean_lines.raw_lines
   1673     raw_line = raw[line_number]
   1674 
   1675     if function_state.end_position.row == line_number:  # last line
   1676         if not search(r'\bNOLINT\b', raw_line):
   1677             function_state.check(error, line_number)
   1678     elif not match(r'^\s*$', line):
   1679         function_state.count(line_number)  # Count non-blank/non-comment lines.
   1680 
   1681 
   1682 def _check_parameter_name_against_text(parameter, text, error):
   1683     """Checks to see if the parameter name is contained within the text.
   1684 
   1685     Return false if the check failed (i.e. an error was produced).
   1686     """
   1687 
   1688     # Treat 'lower with underscores' as a canonical form because it is
   1689     # case insensitive while still retaining word breaks. (This ensures that
   1690     # 'elate' doesn't look like it is duplicating of 'NateLate'.)
   1691     canonical_parameter_name = parameter.lower_with_underscores_name()
   1692 
   1693     # Appends "object" to all text to catch variables that did the same (but only
   1694     # do this when the parameter name is more than a single character to avoid
   1695     # flagging 'b' which may be an ok variable when used in an rgba function).
   1696     if len(canonical_parameter_name) > 1:
   1697         text = sub(r'(\w)\b', r'\1Object', text)
   1698     canonical_text = _convert_to_lower_with_underscores(text)
   1699 
   1700     # Used to detect cases like ec for ExceptionCode.
   1701     acronym = _create_acronym(text).lower()
   1702     if canonical_text.find(canonical_parameter_name) != -1 or acronym.find(canonical_parameter_name) != -1:
   1703         error(parameter.row, 'readability/parameter_name', 5,
   1704               'The parameter name "%s" adds no information, so it should be removed.' % parameter.name)
   1705         return False
   1706     return True
   1707 
   1708 
   1709 def check_function_definition_and_pass_ptr(type_text, row, location_description, error):
   1710     """Check that function definitions for use Pass*Ptr instead of *Ptr.
   1711 
   1712     Args:
   1713        type_text: A string containing the type. (For return values, it may contain more than the type.)
   1714        row: The row number of the type.
   1715        location_description: Used to indicate where the type is. This is either 'parameter' or 'return'.
   1716        error: The function to call with any errors found.
   1717     """
   1718     match_ref_or_own_ptr = '(?=\W|^)(Ref|Own)Ptr(?=\W)'
   1719     exceptions = '(?:&|\*|\*\s*=\s*0)$'
   1720     bad_type_usage = search(match_ref_or_own_ptr, type_text)
   1721     exception_usage = search(exceptions, type_text)
   1722     if not bad_type_usage or exception_usage:
   1723         return
   1724     type_name = bad_type_usage.group(0)
   1725     error(row, 'readability/pass_ptr', 5,
   1726           'The %s type should use Pass%s instead of %s.' % (location_description, type_name, type_name))
   1727 
   1728 
   1729 def check_function_definition(filename, file_extension, clean_lines, line_number, function_state, error):
   1730     """Check that function definitions for style issues.
   1731 
   1732     Specifically, check that parameter names in declarations add information.
   1733 
   1734     Args:
   1735        filename: Filename of the file that is being processed.
   1736        file_extension: The current file extension, without the leading dot.
   1737        clean_lines: A CleansedLines instance containing the file.
   1738        line_number: The number of the line to check.
   1739        function_state: Current function name and lines in body so far.
   1740        error: The function to call with any errors found.
   1741     """
   1742     if line_number != function_state.body_start_position.row:
   1743         return
   1744 
   1745     modifiers_and_return_type = function_state.modifiers_and_return_type()
   1746     if filename.find('/chromium/') != -1 and search(r'\bWEBKIT_EXPORT\b', modifiers_and_return_type):
   1747         if filename.find('/chromium/public/') == -1 and filename.find('/chromium/tests/') == -1 and filename.find('chromium/platform') == -1:
   1748             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1749                   'WEBKIT_EXPORT should only appear in the chromium public (or tests) directory.')
   1750         elif not file_extension == "h":
   1751             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1752                   'WEBKIT_EXPORT should only be used in header files.')
   1753         elif not function_state.is_declaration or search(r'\binline\b', modifiers_and_return_type):
   1754             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1755                   'WEBKIT_EXPORT should not be used on a function with a body.')
   1756         elif function_state.is_pure:
   1757             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1758                   'WEBKIT_EXPORT should not be used with a pure virtual function.')
   1759 
   1760     check_function_definition_and_pass_ptr(modifiers_and_return_type, function_state.function_name_start_position.row, 'return', error)
   1761 
   1762     parameter_list = function_state.parameter_list()
   1763     for parameter in parameter_list:
   1764         check_function_definition_and_pass_ptr(parameter.type, parameter.row, 'parameter', error)
   1765 
   1766         # Do checks specific to function declarations and parameter names.
   1767         if not function_state.is_declaration or not parameter.name:
   1768             continue
   1769 
   1770         # Check the parameter name against the function name for single parameter set functions.
   1771         if len(parameter_list) == 1 and match('set[A-Z]', function_state.current_function):
   1772             trimmed_function_name = function_state.current_function[len('set'):]
   1773             if not _check_parameter_name_against_text(parameter, trimmed_function_name, error):
   1774                 continue  # Since an error was noted for this name, move to the next parameter.
   1775 
   1776         # Check the parameter name against the type.
   1777         if not _check_parameter_name_against_text(parameter, parameter.type, error):
   1778             continue  # Since an error was noted for this name, move to the next parameter.
   1779 
   1780 
   1781 def check_pass_ptr_usage(clean_lines, line_number, function_state, error):
   1782     """Check for proper usage of Pass*Ptr.
   1783 
   1784     Currently this is limited to detecting declarations of Pass*Ptr
   1785     variables inside of functions.
   1786 
   1787     Args:
   1788       clean_lines: A CleansedLines instance containing the file.
   1789       line_number: The number of the line to check.
   1790       function_state: Current function name and lines in body so far.
   1791       error: The function to call with any errors found.
   1792     """
   1793     if not function_state.in_a_function:
   1794         return
   1795 
   1796     lines = clean_lines.lines
   1797     line = lines[line_number]
   1798     if line_number > function_state.body_start_position.row:
   1799         matched_pass_ptr = match(r'^\s*Pass([A-Z][A-Za-z]*)Ptr<', line)
   1800         if matched_pass_ptr:
   1801             type_name = 'Pass%sPtr' % matched_pass_ptr.group(1)
   1802             error(line_number, 'readability/pass_ptr', 5,
   1803                   'Local variables should never be %s (see '
   1804                   'http://webkit.org/coding/RefPtr.html).' % type_name)
   1805 
   1806 
   1807 def check_for_leaky_patterns(clean_lines, line_number, function_state, error):
   1808     """Check for constructs known to be leak prone.
   1809     Args:
   1810       clean_lines: A CleansedLines instance containing the file.
   1811       line_number: The number of the line to check.
   1812       function_state: Current function name and lines in body so far.
   1813       error: The function to call with any errors found.
   1814     """
   1815     lines = clean_lines.lines
   1816     line = lines[line_number]
   1817 
   1818     matched_get_dc = search(r'\b(?P<function_name>GetDC(Ex)?)\s*\(', line)
   1819     if matched_get_dc:
   1820         error(line_number, 'runtime/leaky_pattern', 5,
   1821               'Use the class HWndDC instead of calling %s to avoid potential '
   1822               'memory leaks.' % matched_get_dc.group('function_name'))
   1823 
   1824     matched_create_dc = search(r'\b(?P<function_name>Create(Compatible)?DC)\s*\(', line)
   1825     matched_own_dc = search(r'\badoptPtr\b', line)
   1826     if matched_create_dc and not matched_own_dc:
   1827         error(line_number, 'runtime/leaky_pattern', 5,
   1828               'Use adoptPtr and OwnPtr<HDC> when calling %s to avoid potential '
   1829               'memory leaks.' % matched_create_dc.group('function_name'))
   1830 
   1831 
   1832 def check_spacing(file_extension, clean_lines, line_number, error):
   1833     """Checks for the correctness of various spacing issues in the code.
   1834 
   1835     Things we check for: spaces around operators, spaces after
   1836     if/for/while/switch, no spaces around parens in function calls, two
   1837     spaces between code and comment, don't start a block with a blank
   1838     line, don't end a function with a blank line, don't have too many
   1839     blank lines in a row.
   1840 
   1841     Args:
   1842       file_extension: The current file extension, without the leading dot.
   1843       clean_lines: A CleansedLines instance containing the file.
   1844       line_number: The number of the line to check.
   1845       error: The function to call with any errors found.
   1846     """
   1847 
   1848     raw = clean_lines.raw_lines
   1849     line = raw[line_number]
   1850 
   1851     # Before nixing comments, check if the line is blank for no good
   1852     # reason.  This includes the first line after a block is opened, and
   1853     # blank lines at the end of a function (ie, right before a line like '}').
   1854     if is_blank_line(line):
   1855         elided = clean_lines.elided
   1856         previous_line = elided[line_number - 1]
   1857         previous_brace = previous_line.rfind('{')
   1858         # FIXME: Don't complain if line before blank line, and line after,
   1859         #        both start with alnums and are indented the same amount.
   1860         #        This ignores whitespace at the start of a namespace block
   1861         #        because those are not usually indented.
   1862         if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
   1863             and previous_line[:previous_brace].find('namespace') == -1):
   1864             # OK, we have a blank line at the start of a code block.  Before we
   1865             # complain, we check if it is an exception to the rule: The previous
   1866             # non-empty line has the parameters of a function header that are indented
   1867             # 4 spaces (because they did not fit in a 80 column line when placed on
   1868             # the same line as the function name).  We also check for the case where
   1869             # the previous line is indented 6 spaces, which may happen when the
   1870             # initializers of a constructor do not fit into a 80 column line.
   1871             exception = False
   1872             if match(r' {6}\w', previous_line):  # Initializer list?
   1873                 # We are looking for the opening column of initializer list, which
   1874                 # should be indented 4 spaces to cause 6 space indentation afterwards.
   1875                 search_position = line_number - 2
   1876                 while (search_position >= 0
   1877                        and match(r' {6}\w', elided[search_position])):
   1878                     search_position -= 1
   1879                 exception = (search_position >= 0
   1880                              and elided[search_position][:5] == '    :')
   1881             else:
   1882                 # Search for the function arguments or an initializer list.  We use a
   1883                 # simple heuristic here: If the line is indented 4 spaces; and we have a
   1884                 # closing paren, without the opening paren, followed by an opening brace
   1885                 # or colon (for initializer lists) we assume that it is the last line of
   1886                 # a function header.  If we have a colon indented 4 spaces, it is an
   1887                 # initializer list.
   1888                 exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
   1889                                    previous_line)
   1890                              or match(r' {4}:', previous_line))
   1891 
   1892             if not exception:
   1893                 error(line_number, 'whitespace/blank_line', 2,
   1894                       'Blank line at the start of a code block.  Is this needed?')
   1895         # This doesn't ignore whitespace at the end of a namespace block
   1896         # because that is too hard without pairing open/close braces;
   1897         # however, a special exception is made for namespace closing
   1898         # brackets which have a comment containing "namespace".
   1899         #
   1900         # Also, ignore blank lines at the end of a block in a long if-else
   1901         # chain, like this:
   1902         #   if (condition1) {
   1903         #     // Something followed by a blank line
   1904         #
   1905         #   } else if (condition2) {
   1906         #     // Something else
   1907         #   }
   1908         if line_number + 1 < clean_lines.num_lines():
   1909             next_line = raw[line_number + 1]
   1910             if (next_line
   1911                 and match(r'\s*}', next_line)
   1912                 and next_line.find('namespace') == -1
   1913                 and next_line.find('} else ') == -1):
   1914                 error(line_number, 'whitespace/blank_line', 3,
   1915                       'Blank line at the end of a code block.  Is this needed?')
   1916 
   1917     # Next, we check for proper spacing with respect to comments.
   1918     comment_position = line.find('//')
   1919     if comment_position != -1:
   1920         # Check if the // may be in quotes.  If so, ignore it
   1921         # Comparisons made explicit for clarity
   1922         if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
   1923             # Allow one space before end of line comment.
   1924             if (not match(r'^\s*$', line[:comment_position])
   1925                 and (comment_position >= 1
   1926                 and ((line[comment_position - 1] not in string.whitespace)
   1927                      or (comment_position >= 2
   1928                          and line[comment_position - 2] in string.whitespace)))):
   1929                 error(line_number, 'whitespace/comments', 5,
   1930                       'One space before end of line comments')
   1931             # There should always be a space between the // and the comment
   1932             commentend = comment_position + 2
   1933             if commentend < len(line) and not line[commentend] == ' ':
   1934                 # but some lines are exceptions -- e.g. if they're big
   1935                 # comment delimiters like:
   1936                 # //----------------------------------------------------------
   1937                 # or they begin with multiple slashes followed by a space:
   1938                 # //////// Header comment
   1939                 matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
   1940                            or search(r'^/+ ', line[commentend:]))
   1941                 if not matched:
   1942                     error(line_number, 'whitespace/comments', 4,
   1943                           'Should have a space between // and comment')
   1944 
   1945             # There should only be one space after punctuation in a comment.
   1946             if search(r'[.!?,;:]\s\s+\w', line[comment_position:]):
   1947                 error(line_number, 'whitespace/comments', 5,
   1948                       'Should have only a single space after a punctuation in a comment.')
   1949 
   1950     line = clean_lines.elided[line_number]  # get rid of comments and strings
   1951 
   1952     # Don't try to do spacing checks for operator methods
   1953     line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>|\+=|-=|\*=|/=|%=|&=|\|=|^=|<<=|>>=|/)\(', 'operator\(', line)
   1954     # Don't try to do spacing checks for #include or #import statements at
   1955     # minimum because it messes up checks for spacing around /
   1956     if match(r'\s*#\s*(?:include|import)', line):
   1957         return
   1958     if search(r'[\w.]=[\w.]', line):
   1959         error(line_number, 'whitespace/operators', 4,
   1960               'Missing spaces around =')
   1961 
   1962     # FIXME: It's not ok to have spaces around binary operators like .
   1963 
   1964     # You should always have whitespace around binary operators.
   1965     # Alas, we can't test < or > because they're legitimately used sans spaces
   1966     # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
   1967     # only if it's not template params list spilling into the next line.
   1968     matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line)
   1969     if not matched:
   1970         # Note that while it seems that the '<[^<]*' term in the following
   1971         # regexp could be simplified to '<.*', which would indeed match
   1972         # the same class of strings, the [^<] means that searching for the
   1973         # regexp takes linear rather than quadratic time.
   1974         if not search(r'<[^<]*,\s*$', line):  # template params spill
   1975             matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
   1976     if matched:
   1977         error(line_number, 'whitespace/operators', 3,
   1978               'Missing spaces around %s' % matched.group(1))
   1979 
   1980     # There shouldn't be space around unary operators
   1981     matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
   1982     if matched:
   1983         error(line_number, 'whitespace/operators', 4,
   1984               'Extra space for operator %s' % matched.group(1))
   1985 
   1986     # A pet peeve of mine: no spaces after an if, while, switch, or for
   1987     matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
   1988     if matched:
   1989         error(line_number, 'whitespace/parens', 5,
   1990               'Missing space before ( in %s' % matched.group(1))
   1991 
   1992     # For if/for/foreach/while/switch, the left and right parens should be
   1993     # consistent about how many spaces are inside the parens, and
   1994     # there should either be zero or one spaces inside the parens.
   1995     # We don't want: "if ( foo)" or "if ( foo   )".
   1996     # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   1997     matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<remainder>.*)$', line)
   1998     if matched:
   1999         statement = matched.group('statement')
   2000         condition, rest = up_to_unmatched_closing_paren(matched.group('remainder'))
   2001         if condition is not None:
   2002             condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition)
   2003             if condition_match:
   2004                 n_leading = len(condition_match.group('leading'))
   2005                 n_trailing = len(condition_match.group('trailing'))
   2006                 if n_leading != 0:
   2007                     for_exception = statement == 'for' and condition.startswith(' ;')
   2008                     if not for_exception:
   2009                         error(line_number, 'whitespace/parens', 5,
   2010                               'Extra space after ( in %s' % statement)
   2011                 if n_trailing != 0:
   2012                     for_exception = statement == 'for' and condition.endswith('; ')
   2013                     if not for_exception:
   2014                         error(line_number, 'whitespace/parens', 5,
   2015                               'Extra space before ) in %s' % statement)
   2016 
   2017             # Do not check for more than one command in macros
   2018             in_preprocessor_directive = match(r'\s*#', line)
   2019             if not in_preprocessor_directive and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest):
   2020                 error(line_number, 'whitespace/parens', 4,
   2021                       'More than one command on the same line in %s' % statement)
   2022 
   2023     # You should always have a space after a comma (either as fn arg or operator)
   2024     if search(r',[^\s]', line):
   2025         error(line_number, 'whitespace/comma', 3,
   2026               'Missing space after ,')
   2027 
   2028     matched = search(r'^\s*(?P<token1>[a-zA-Z0-9_\*&]+)\s\s+(?P<token2>[a-zA-Z0-9_\*&]+)', line)
   2029     if matched:
   2030         error(line_number, 'whitespace/declaration', 3,
   2031               'Extra space between %s and %s' % (matched.group('token1'), matched.group('token2')))
   2032 
   2033     if file_extension == 'cpp':
   2034         # C++ should have the & or * beside the type not the variable name.
   2035         matched = match(r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+', line)
   2036         if matched:
   2037             error(line_number, 'whitespace/declaration', 3,
   2038                   'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
   2039 
   2040     elif file_extension == 'c':
   2041         # C Pointer declaration should have the * beside the variable not the type name.
   2042         matched = search(r'^\s*\w+\*\s+\w+', line)
   2043         if matched:
   2044             error(line_number, 'whitespace/declaration', 3,
   2045                   'Declaration has space between * and variable name in %s' % matched.group(0).strip())
   2046 
   2047     # Next we will look for issues with function calls.
   2048     check_spacing_for_function_call(line, line_number, error)
   2049 
   2050     # Except after an opening paren, you should have spaces before your braces.
   2051     # And since you should never have braces at the beginning of a line, this is
   2052     # an easy test.
   2053     if search(r'[^ ({]{', line):
   2054         error(line_number, 'whitespace/braces', 5,
   2055               'Missing space before {')
   2056 
   2057     # Make sure '} else {' has spaces.
   2058     if search(r'}else', line):
   2059         error(line_number, 'whitespace/braces', 5,
   2060               'Missing space before else')
   2061 
   2062     # You shouldn't have spaces before your brackets, except maybe after
   2063     # 'delete []' or 'new char * []'.
   2064     if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
   2065         error(line_number, 'whitespace/braces', 5,
   2066               'Extra space before [')
   2067 
   2068     # There should always be a single space in between braces on the same line.
   2069     if search(r'\{\}', line):
   2070         error(line_number, 'whitespace/braces', 5, 'Missing space inside { }.')
   2071     if search(r'\{\s\s+\}', line):
   2072         error(line_number, 'whitespace/braces', 5, 'Too many spaces inside { }.')
   2073 
   2074     # You shouldn't have a space before a semicolon at the end of the line.
   2075     # There's a special case for "for" since the style guide allows space before
   2076     # the semicolon there.
   2077     if search(r':\s*;\s*$', line):
   2078         error(line_number, 'whitespace/semicolon', 5,
   2079               'Semicolon defining empty statement. Use { } instead.')
   2080     elif search(r'^\s*;\s*$', line):
   2081         error(line_number, 'whitespace/semicolon', 5,
   2082               'Line contains only semicolon. If this should be an empty statement, '
   2083               'use { } instead.')
   2084     elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
   2085         error(line_number, 'whitespace/semicolon', 5,
   2086               'Extra space before last semicolon. If this should be an empty '
   2087               'statement, use { } instead.')
   2088     elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
   2089           and line.count('(') == line.count(')')
   2090           # Allow do {} while();
   2091           and not search(r'}\s*while', line)):
   2092         error(line_number, 'whitespace/semicolon', 5,
   2093               'Semicolon defining empty statement for this loop. Use { } instead.')
   2094 
   2095 
   2096 def get_previous_non_blank_line(clean_lines, line_number):
   2097     """Return the most recent non-blank line and its line number.
   2098 
   2099     Args:
   2100       clean_lines: A CleansedLines instance containing the file contents.
   2101       line_number: The number of the line to check.
   2102 
   2103     Returns:
   2104       A tuple with two elements.  The first element is the contents of the last
   2105       non-blank line before the current line, or the empty string if this is the
   2106       first non-blank line.  The second is the line number of that line, or -1
   2107       if this is the first non-blank line.
   2108     """
   2109 
   2110     previous_line_number = line_number - 1
   2111     while previous_line_number >= 0:
   2112         previous_line = clean_lines.elided[previous_line_number]
   2113         if not is_blank_line(previous_line):     # if not a blank line...
   2114             return (previous_line, previous_line_number)
   2115         previous_line_number -= 1
   2116     return ('', -1)
   2117 
   2118 
   2119 def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error):
   2120     """Looks for indentation errors inside of namespaces.
   2121 
   2122     Args:
   2123       clean_lines: A CleansedLines instance containing the file.
   2124       line_number: The number of the line to check.
   2125       file_extension: The extension (dot not included) of the file.
   2126       file_state: A _FileState instance which maintains information about
   2127                   the state of things in the file.
   2128       error: The function to call with any errors found.
   2129     """
   2130 
   2131     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2132 
   2133     namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
   2134     if not namespace_match:
   2135         return
   2136 
   2137     current_indentation_level = len(namespace_match.group('namespace_indentation'))
   2138     if current_indentation_level > 0:
   2139         # Don't warn about an indented namespace if we already warned about indented code.
   2140         if not file_state.did_inside_namespace_indent_warning():
   2141             error(line_number, 'whitespace/indent', 4,
   2142                   'namespace should never be indented.')
   2143         return
   2144     looking_for_semicolon = False;
   2145     line_offset = 0
   2146     in_preprocessor_directive = False;
   2147     for current_line in clean_lines.elided[line_number + 1:]:
   2148         line_offset += 1
   2149         if not current_line.strip():
   2150             continue
   2151         if not current_indentation_level:
   2152             if not (in_preprocessor_directive or looking_for_semicolon):
   2153                 if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning():
   2154                     file_state.set_did_inside_namespace_indent_warning()
   2155                     error(line_number + line_offset, 'whitespace/indent', 4,
   2156                           'Code inside a namespace should not be indented.')
   2157             if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax.
   2158                 in_preprocessor_directive = current_line[-1] == '\\'
   2159             else:
   2160                 looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\')
   2161         else:
   2162             looking_for_semicolon = False; # If we have a brace we may not need a semicolon.
   2163         current_indentation_level += current_line.count('{') - current_line.count('}')
   2164         if current_indentation_level < 0:
   2165             break;
   2166 
   2167 
   2168 def check_enum_casing(clean_lines, line_number, enum_state, error):
   2169     """Looks for incorrectly named enum values.
   2170 
   2171     Args:
   2172       clean_lines: A CleansedLines instance containing the file.
   2173       line_number: The number of the line to check.
   2174       enum_state: A _EnumState instance which maintains enum declaration state.
   2175       error: The function to call with any errors found.
   2176     """
   2177 
   2178     enum_state.is_webidl_enum |= bool(match(r'\s*// Web(?:Kit)?IDL enum\s*$', clean_lines.raw_lines[line_number]))
   2179 
   2180     line = clean_lines.elided[line_number]  # Get rid of comments and strings.
   2181     if not enum_state.process_clean_line(line):
   2182         error(line_number, 'readability/enum_casing', 4,
   2183               'enum members should use InterCaps with an initial capital letter.')
   2184 
   2185 def check_directive_indentation(clean_lines, line_number, file_state, error):
   2186     """Looks for indentation of preprocessor directives.
   2187 
   2188     Args:
   2189       clean_lines: A CleansedLines instance containing the file.
   2190       line_number: The number of the line to check.
   2191       file_state: A _FileState instance which maintains information about
   2192                   the state of things in the file.
   2193       error: The function to call with any errors found.
   2194     """
   2195 
   2196     line = clean_lines.elided[line_number]  # Get rid of comments and strings.
   2197 
   2198     indented_preprocessor_directives = match(r'\s+#', line)
   2199     if not indented_preprocessor_directives:
   2200         return
   2201 
   2202     error(line_number, 'whitespace/indent', 4, 'preprocessor directives (e.g., #ifdef, #define, #import) should never be indented.')
   2203 
   2204 
   2205 def get_initial_spaces_for_line(clean_line):
   2206     initial_spaces = 0
   2207     while initial_spaces < len(clean_line) and clean_line[initial_spaces] == ' ':
   2208         initial_spaces += 1
   2209     return initial_spaces
   2210 
   2211 
   2212 def check_indentation_amount(clean_lines, line_number, error):
   2213     line = clean_lines.elided[line_number]
   2214     initial_spaces = get_initial_spaces_for_line(line)
   2215 
   2216     if initial_spaces % 4:
   2217         error(line_number, 'whitespace/indent', 3,
   2218               'Weird number of spaces at line-start.  Are you using a 4-space indent?')
   2219         return
   2220 
   2221     previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2222     if not previous_line.strip() or match(r'\s*\w+\s*:\s*$', previous_line) or previous_line[0] == '#':
   2223         return
   2224 
   2225     previous_line_initial_spaces = get_initial_spaces_for_line(previous_line)
   2226     if initial_spaces > previous_line_initial_spaces + 4:
   2227         error(line_number, 'whitespace/indent', 3, 'When wrapping a line, only indent 4 spaces.')
   2228 
   2229 
   2230 def check_using_std(clean_lines, line_number, file_state, error):
   2231     """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'.
   2232 
   2233     Args:
   2234       clean_lines: A CleansedLines instance containing the file.
   2235       line_number: The number of the line to check.
   2236       file_state: A _FileState instance which maintains information about
   2237                   the state of things in the file.
   2238       error: The function to call with any errors found.
   2239     """
   2240 
   2241     # This check doesn't apply to C or Objective-C implementation files.
   2242     if file_state.is_c_or_objective_c():
   2243         return
   2244 
   2245     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2246 
   2247     using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line)
   2248     if not using_std_match:
   2249         return
   2250 
   2251     method_name = using_std_match.group('method_name')
   2252     # Exception for the established idiom for swapping objects in generic code.
   2253     if method_name == 'swap':
   2254         return
   2255     error(line_number, 'build/using_std', 4,
   2256           "Use 'using namespace std;' instead of 'using std::%s;'." % method_name)
   2257 
   2258 
   2259 def check_max_min_macros(clean_lines, line_number, file_state, error):
   2260     """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min().
   2261 
   2262     Args:
   2263       clean_lines: A CleansedLines instance containing the file.
   2264       line_number: The number of the line to check.
   2265       file_state: A _FileState instance which maintains information about
   2266                   the state of things in the file.
   2267       error: The function to call with any errors found.
   2268     """
   2269 
   2270     # This check doesn't apply to C or Objective-C implementation files.
   2271     if file_state.is_c_or_objective_c():
   2272         return
   2273 
   2274     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2275 
   2276     max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line)
   2277     if not max_min_macros_search:
   2278         return
   2279 
   2280     max_min_macro = max_min_macros_search.group('max_min_macro')
   2281     max_min_macro_lower = max_min_macro.lower()
   2282     error(line_number, 'runtime/max_min_macros', 4,
   2283           'Use std::%s() or std::%s<type>() instead of the %s() macro.'
   2284           % (max_min_macro_lower, max_min_macro_lower, max_min_macro))
   2285 
   2286 
   2287 def check_ctype_functions(clean_lines, line_number, file_state, error):
   2288     """Looks for use of the standard functions in ctype.h and suggest they be replaced
   2289        by use of equivilent ones in <wtf/ASCIICType.h>?.
   2290 
   2291     Args:
   2292       clean_lines: A CleansedLines instance containing the file.
   2293       line_number: The number of the line to check.
   2294       file_state: A _FileState instance which maintains information about
   2295                   the state of things in the file.
   2296       error: The function to call with any errors found.
   2297     """
   2298 
   2299     line = clean_lines.elided[line_number]  # Get rid of comments and strings.
   2300 
   2301     ctype_function_search = search(r'\b(?P<ctype_function>(isalnum|isalpha|isascii|isblank|iscntrl|isdigit|isgraph|islower|isprint|ispunct|isspace|isupper|isxdigit|toascii|tolower|toupper))\s*\(', line)
   2302     if not ctype_function_search:
   2303         return
   2304 
   2305     ctype_function = ctype_function_search.group('ctype_function')
   2306     error(line_number, 'runtime/ctype_function', 4,
   2307           'Use equivelent function in <wtf/ASCIICType.h> instead of the %s() function.'
   2308           % (ctype_function))
   2309 
   2310 def check_switch_indentation(clean_lines, line_number, error):
   2311     """Looks for indentation errors inside of switch statements.
   2312 
   2313     Args:
   2314       clean_lines: A CleansedLines instance containing the file.
   2315       line_number: The number of the line to check.
   2316       error: The function to call with any errors found.
   2317     """
   2318 
   2319     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2320 
   2321     switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
   2322     if not switch_match:
   2323         return
   2324 
   2325     switch_indentation = switch_match.group('switch_indentation')
   2326     inner_indentation = switch_indentation + ' ' * 4
   2327     line_offset = 0
   2328     encountered_nested_switch = False
   2329 
   2330     for current_line in clean_lines.elided[line_number + 1:]:
   2331         line_offset += 1
   2332 
   2333         # Skip not only empty lines but also those with preprocessor directives.
   2334         if current_line.strip() == '' or current_line.startswith('#'):
   2335             continue
   2336 
   2337         if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
   2338             # Complexity alarm - another switch statement nested inside the one
   2339             # that we're currently testing. We'll need to track the extent of
   2340             # that inner switch if the upcoming label tests are still supposed
   2341             # to work correctly. Let's not do that; instead, we'll finish
   2342             # checking this line, and then leave it like that. Assuming the
   2343             # indentation is done consistently (even if incorrectly), this will
   2344             # still catch all indentation issues in practice.
   2345             encountered_nested_switch = True
   2346 
   2347         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   2348         current_indentation = current_indentation_match.group('indentation')
   2349         remaining_line = current_indentation_match.group('remaining_line')
   2350 
   2351         # End the check at the end of the switch statement.
   2352         if remaining_line.startswith('}') and current_indentation == switch_indentation:
   2353             break
   2354         # Case and default branches should not be indented. The regexp also
   2355         # catches single-line cases like "default: break;" but does not trigger
   2356         # on stuff like "Document::Foo();".
   2357         elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
   2358             if current_indentation != switch_indentation:
   2359                 error(line_number + line_offset, 'whitespace/indent', 4,
   2360                       'A case label should not be indented, but line up with its switch statement.')
   2361                 # Don't throw an error for multiple badly indented labels,
   2362                 # one should be enough to figure out the problem.
   2363                 break
   2364         # We ignore goto labels at the very beginning of a line.
   2365         elif match(r'\w+\s*:\s*$', remaining_line):
   2366             continue
   2367         # It's not a goto label, so check if it's indented at least as far as
   2368         # the switch statement plus one more level of indentation.
   2369         elif not current_indentation.startswith(inner_indentation):
   2370             error(line_number + line_offset, 'whitespace/indent', 4,
   2371                   'Non-label code inside switch statements should be indented.')
   2372             # Don't throw an error for multiple badly indented statements,
   2373             # one should be enough to figure out the problem.
   2374             break
   2375 
   2376         if encountered_nested_switch:
   2377             break
   2378 
   2379 
   2380 def check_braces(clean_lines, line_number, error):
   2381     """Looks for misplaced braces (e.g. at the end of line).
   2382 
   2383     Args:
   2384       clean_lines: A CleansedLines instance containing the file.
   2385       line_number: The number of the line to check.
   2386       error: The function to call with any errors found.
   2387     """
   2388 
   2389     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2390 
   2391     if match(r'\s*{\s*$', line):
   2392         # We allow an open brace to start a line in the case where someone
   2393         # is using braces for function definition or in a block to
   2394         # explicitly create a new scope, which is commonly used to control
   2395         # the lifetime of stack-allocated variables.  We don't detect this
   2396         # perfectly: we just don't complain if the last non-whitespace
   2397         # character on the previous non-blank line is ';', ':', '{', '}',
   2398         # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
   2399         # We also allow '#' for #endif and '=' for array initialization.
   2400         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2401         if ((not search(r'[;:}{)=]\s*$|\)\s*((const|OVERRIDE)\s*)*\s*$', previous_line)
   2402              or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
   2403             and previous_line.find('#') < 0):
   2404             error(line_number, 'whitespace/braces', 4,
   2405                   'This { should be at the end of the previous line')
   2406     elif (search(r'\)\s*(((const|OVERRIDE)\s*)*\s*)?{\s*$', line)
   2407           and line.count('(') == line.count(')')
   2408           and not search(r'\b(if|for|foreach|while|switch)\b', line)
   2409           and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)):
   2410         error(line_number, 'whitespace/braces', 4,
   2411               'Place brace on its own line for function definitions.')
   2412 
   2413     # An else clause should be on the same line as the preceding closing brace.
   2414     if match(r'\s*else\s*', line):
   2415         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2416         if match(r'\s*}\s*$', previous_line):
   2417             error(line_number, 'whitespace/newline', 4,
   2418                   'An else should appear on the same line as the preceding }')
   2419 
   2420     # Likewise, an else should never have the else clause on the same line
   2421     if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
   2422         error(line_number, 'whitespace/newline', 4,
   2423               'Else clause should never be on same line as else (use 2 lines)')
   2424 
   2425     # In the same way, a do/while should never be on one line
   2426     if match(r'\s*do [^\s{]', line):
   2427         error(line_number, 'whitespace/newline', 4,
   2428               'do/while clauses should not be on a single line')
   2429 
   2430     # Braces shouldn't be followed by a ; unless they're defining a struct
   2431     # or initializing an array.
   2432     # We can't tell in general, but we can for some common cases.
   2433     previous_line_number = line_number
   2434     while True:
   2435         (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
   2436         if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
   2437             line = previous_line + line
   2438         else:
   2439             break
   2440     if (search(r'{.*}\s*;', line)
   2441         and line.count('{') == line.count('}')
   2442         and not search(r'struct|class|enum|\s*=\s*{', line)):
   2443         error(line_number, 'readability/braces', 4,
   2444               "You don't need a ; after a }")
   2445 
   2446 
   2447 def check_exit_statement_simplifications(clean_lines, line_number, error):
   2448     """Looks for else or else-if statements that should be written as an
   2449     if statement when the prior if concludes with a return, break, continue or
   2450     goto statement.
   2451 
   2452     Args:
   2453       clean_lines: A CleansedLines instance containing the file.
   2454       line_number: The number of the line to check.
   2455       error: The function to call with any errors found.
   2456     """
   2457 
   2458     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2459 
   2460     else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
   2461     if not else_match:
   2462         return
   2463 
   2464     else_indentation = else_match.group('else_indentation')
   2465     inner_indentation = else_indentation + ' ' * 4
   2466 
   2467     previous_lines = clean_lines.elided[:line_number]
   2468     previous_lines.reverse()
   2469     line_offset = 0
   2470     encountered_exit_statement = False
   2471 
   2472     for current_line in previous_lines:
   2473         line_offset -= 1
   2474 
   2475         # Skip not only empty lines but also those with preprocessor directives
   2476         # and goto labels.
   2477         if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
   2478             continue
   2479 
   2480         # Skip lines with closing braces on the original indentation level.
   2481         # Even though the styleguide says they should be on the same line as
   2482         # the "else if" statement, we also want to check for instances where
   2483         # the current code does not comply with the coding style. Thus, ignore
   2484         # these lines and proceed to the line before that.
   2485         if current_line == else_indentation + '}':
   2486             continue
   2487 
   2488         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   2489         current_indentation = current_indentation_match.group('indentation')
   2490         remaining_line = current_indentation_match.group('remaining_line')
   2491 
   2492         # As we're going up the lines, the first real statement to encounter
   2493         # has to be an exit statement (return, break, continue or goto) -
   2494         # otherwise, this check doesn't apply.
   2495         if not encountered_exit_statement:
   2496             # We only want to find exit statements if they are on exactly
   2497             # the same level of indentation as expected from the code inside
   2498             # the block. If the indentation doesn't strictly match then we
   2499             # might have a nested if or something, which must be ignored.
   2500             if current_indentation != inner_indentation:
   2501                 break
   2502             if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
   2503                 encountered_exit_statement = True
   2504                 continue
   2505             break
   2506 
   2507         # When code execution reaches this point, we've found an exit statement
   2508         # as last statement of the previous block. Now we only need to make
   2509         # sure that the block belongs to an "if", then we can throw an error.
   2510 
   2511         # Skip lines with opening braces on the original indentation level,
   2512         # similar to the closing braces check above. ("if (condition)\n{")
   2513         if current_line == else_indentation + '{':
   2514             continue
   2515 
   2516         # Skip everything that's further indented than our "else" or "else if".
   2517         if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
   2518             continue
   2519 
   2520         # So we've got a line with same (or less) indentation. Is it an "if"?
   2521         # If yes: throw an error. If no: don't throw an error.
   2522         # Whatever the outcome, this is the end of our loop.
   2523         if match(r'if\s*\(', remaining_line):
   2524             if else_match.start('else') != -1:
   2525                 error(line_number + line_offset, 'readability/control_flow', 4,
   2526                       'An else statement can be removed when the prior "if" '
   2527                       'concludes with a return, break, continue or goto statement.')
   2528             else:
   2529                 error(line_number + line_offset, 'readability/control_flow', 4,
   2530                       'An else if statement should be written as an if statement '
   2531                       'when the prior "if" concludes with a return, break, '
   2532                       'continue or goto statement.')
   2533         break
   2534 
   2535 
   2536 def replaceable_check(operator, macro, line):
   2537     """Determine whether a basic CHECK can be replaced with a more specific one.
   2538 
   2539     For example suggest using CHECK_EQ instead of CHECK(a == b) and
   2540     similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
   2541 
   2542     Args:
   2543       operator: The C++ operator used in the CHECK.
   2544       macro: The CHECK or EXPECT macro being called.
   2545       line: The current source line.
   2546 
   2547     Returns:
   2548       True if the CHECK can be replaced with a more specific one.
   2549     """
   2550 
   2551     # This matches decimal and hex integers, strings, and chars (in that order).
   2552     match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
   2553 
   2554     # Expression to match two sides of the operator with something that
   2555     # looks like a literal, since CHECK(x == iterator) won't compile.
   2556     # This means we can't catch all the cases where a more specific
   2557     # CHECK is possible, but it's less annoying than dealing with
   2558     # extraneous warnings.
   2559     match_this = (r'\s*' + macro + r'\((\s*' +
   2560                   match_constant + r'\s*' + operator + r'[^<>].*|'
   2561                   r'.*[^<>]' + operator + r'\s*' + match_constant +
   2562                   r'\s*\))')
   2563 
   2564     # Don't complain about CHECK(x == NULL) or similar because
   2565     # CHECK_EQ(x, NULL) won't compile (requires a cast).
   2566     # Also, don't complain about more complex boolean expressions
   2567     # involving && or || such as CHECK(a == b || c == d).
   2568     return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
   2569 
   2570 
   2571 def check_check(clean_lines, line_number, error):
   2572     """Checks the use of CHECK and EXPECT macros.
   2573 
   2574     Args:
   2575       clean_lines: A CleansedLines instance containing the file.
   2576       line_number: The number of the line to check.
   2577       error: The function to call with any errors found.
   2578     """
   2579 
   2580     # Decide the set of replacement macros that should be suggested
   2581     raw_lines = clean_lines.raw_lines
   2582     current_macro = ''
   2583     for macro in _CHECK_MACROS:
   2584         if raw_lines[line_number].find(macro) >= 0:
   2585             current_macro = macro
   2586             break
   2587     if not current_macro:
   2588         # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
   2589         return
   2590 
   2591     line = clean_lines.elided[line_number]        # get rid of comments and strings
   2592 
   2593     # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
   2594     for operator in ['==', '!=', '>=', '>', '<=', '<']:
   2595         if replaceable_check(operator, current_macro, line):
   2596             error(line_number, 'readability/check', 2,
   2597                   'Consider using %s instead of %s(a %s b)' % (
   2598                       _CHECK_REPLACEMENT[current_macro][operator],
   2599                       current_macro, operator))
   2600             break
   2601 
   2602 
   2603 def check_for_comparisons_to_boolean(clean_lines, line_number, error):
   2604     # Get the line without comments and strings.
   2605     line = clean_lines.elided[line_number]
   2606 
   2607     # Must include NULL here, as otherwise users will convert NULL to 0 and
   2608     # then we can't catch it, since it looks like a valid integer comparison.
   2609     if search(r'[=!]=\s*(NULL|nullptr|true|false)[^\w.]', line) or search(r'[^\w.](NULL|nullptr|true|false)\s*[=!]=', line):
   2610         if not search('LIKELY', line) and not search('UNLIKELY', line):
   2611             error(line_number, 'readability/comparison_to_boolean', 5,
   2612                   'Tests for true/false and null/non-null should be done without equality comparisons.')
   2613 
   2614 
   2615 def check_for_null(clean_lines, line_number, file_state, error):
   2616     # This check doesn't apply to C or Objective-C implementation files.
   2617     if file_state.is_c_or_objective_c():
   2618         return
   2619 
   2620     line = clean_lines.elided[line_number]
   2621 
   2622     # Don't warn about NULL usage in g_*(). See Bug 32858 and 39372.
   2623     if search(r'\bg(_[a-z]+)+\b', line):
   2624         return
   2625 
   2626     # Don't warn about NULL usage in gst_*(). See Bug 70498.
   2627     if search(r'\bgst(_[a-z]+)+\b', line):
   2628         return
   2629 
   2630     # Don't warn about NULL usage in gdk_pixbuf_save_to_*{join,concat}(). See Bug 43090.
   2631     if search(r'\bgdk_pixbuf_save_to\w+\b', line):
   2632         return
   2633 
   2634     # Don't warn about NULL usage in gtk_widget_style_get(), gtk_style_context_get_style(), or gtk_style_context_get(). See Bug 51758
   2635     if search(r'\bgtk_widget_style_get\(\w+\b', line) or search(r'\bgtk_style_context_get_style\(\w+\b', line) or search(r'\bgtk_style_context_get\(\w+\b', line):
   2636         return
   2637 
   2638     # Don't warn about NULL usage in soup_server_new(). See Bug 77890.
   2639     if search(r'\bsoup_server_new\(\w+\b', line):
   2640         return
   2641 
   2642     if search(r'\bNULL\b', line):
   2643         error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
   2644         return
   2645 
   2646     line = clean_lines.raw_lines[line_number]
   2647     # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
   2648     # matches, then do the check with strings collapsed to avoid giving errors for
   2649     # NULLs occurring in strings.
   2650     if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
   2651         error(line_number, 'readability/null', 4, 'Use 0 or null instead of NULL (even in *comments*).')
   2652 
   2653 def get_line_width(line):
   2654     """Determines the width of the line in column positions.
   2655 
   2656     Args:
   2657       line: A string, which may be a Unicode string.
   2658 
   2659     Returns:
   2660       The width of the line in column positions, accounting for Unicode
   2661       combining characters and wide characters.
   2662     """
   2663     if isinstance(line, unicode):
   2664         width = 0
   2665         for c in unicodedata.normalize('NFC', line):
   2666             if unicodedata.east_asian_width(c) in ('W', 'F'):
   2667                 width += 2
   2668             elif not unicodedata.combining(c):
   2669                 width += 1
   2670         return width
   2671     return len(line)
   2672 
   2673 
   2674 def check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line_number, error):
   2675     """Scans the bodies of conditionals and loops, and in particular
   2676     all the arms of conditionals, for violations in the use of braces.
   2677 
   2678     Specifically:
   2679 
   2680     (1) If an arm omits braces, then the following statement must be on one
   2681     physical line.
   2682     (2) If any arm uses braces, all arms must use them.
   2683 
   2684     These checks are only done here if we find the start of an
   2685     'if/for/foreach/while' statement, because this function fails fast
   2686     if it encounters constructs it doesn't understand. Checks
   2687     elsewhere validate other constraints, such as requiring '}' and
   2688     'else' to be on the same line.
   2689 
   2690     Args:
   2691       clean_lines: A CleansedLines instance containing the file.
   2692       line_number: The number of the line to check.
   2693       error: The function to call with any errors found.
   2694     """
   2695 
   2696     # We work with the elided lines. Comments have been removed, but line
   2697     # numbers are preserved, so we can still find situations where
   2698     # single-expression control clauses span multiple lines, or when a
   2699     # comment preceded the expression.
   2700     lines = clean_lines.elided
   2701     line = lines[line_number]
   2702 
   2703     # Match control structures.
   2704     control_match = match(r'\s*(if|foreach|for|while)\s*\(', line)
   2705     if not control_match:
   2706         return
   2707 
   2708     # Found the start of a conditional or loop.
   2709 
   2710     # The following loop handles all potential arms of the control clause.
   2711     # The initial conditions are the following:
   2712     #   - We start on the opening paren '(' of the condition, *unless* we are
   2713     #     handling an 'else' block, in which case there is no condition.
   2714     #   - In the latter case, we start at the position just beyond the 'else'
   2715     #     token.
   2716     expect_conditional_expression = True
   2717     know_whether_using_braces = False
   2718     using_braces = False
   2719     search_for_else_clause = control_match.group(1) == "if"
   2720     current_pos = Position(line_number, control_match.end() - 1)
   2721 
   2722     while True:
   2723         if expect_conditional_expression:
   2724             # Try to find the end of the conditional expression,
   2725             # potentially spanning multiple lines.
   2726             open_paren_pos = current_pos
   2727             close_paren_pos = close_expression(lines, open_paren_pos)
   2728             if close_paren_pos.column < 0:
   2729                 return
   2730             current_pos = close_paren_pos
   2731 
   2732         end_line_of_conditional = current_pos.row
   2733 
   2734         # Find the start of the body.
   2735         current_pos = _find_in_lines(r'\S', lines, current_pos, None)
   2736         if not current_pos:
   2737             return
   2738 
   2739         current_arm_uses_brace = False
   2740         if lines[current_pos.row][current_pos.column] == '{':
   2741             current_arm_uses_brace = True
   2742         if know_whether_using_braces:
   2743             if using_braces != current_arm_uses_brace:
   2744                 error(current_pos.row, 'whitespace/braces', 4,
   2745                       'If one part of an if-else statement uses curly braces, the other part must too.')
   2746                 return
   2747         know_whether_using_braces = True
   2748         using_braces = current_arm_uses_brace
   2749 
   2750         if using_braces:
   2751             # Skip over the entire arm.
   2752             current_pos = close_expression(lines, current_pos)
   2753             if current_pos.column < 0:
   2754                 return
   2755         else:
   2756             # Skip over the current expression.
   2757             current_line_number = current_pos.row
   2758             current_pos = _find_in_lines(r';', lines, current_pos, None)
   2759             if not current_pos:
   2760                 return
   2761             # If the end of the expression is beyond the line just after
   2762             # the close parenthesis or control clause, we've found a
   2763             # single-expression arm that spans multiple lines. (We don't
   2764             # fire this error for expressions ending on the same line; that
   2765             # is a different error, handled elsewhere.)
   2766             if current_pos.row > 1 + end_line_of_conditional:
   2767                 error(current_pos.row, 'whitespace/braces', 4,
   2768                       'A conditional or loop body must use braces if the statement is more than one line long.')
   2769                 return
   2770             current_pos = Position(current_pos.row, 1 + current_pos.column)
   2771 
   2772         # At this point current_pos points just past the end of the last
   2773         # arm. If we just handled the last control clause, we're done.
   2774         if not search_for_else_clause:
   2775             return
   2776 
   2777         # Scan forward for the next non-whitespace character, and see
   2778         # whether we are continuing a conditional (with an 'else' or
   2779         # 'else if'), or are done.
   2780         current_pos = _find_in_lines(r'\S', lines, current_pos, None)
   2781         if not current_pos:
   2782             return
   2783         next_nonspace_string = lines[current_pos.row][current_pos.column:]
   2784         next_conditional = match(r'(else\s*if|else)', next_nonspace_string)
   2785         if not next_conditional:
   2786             # Done processing this 'if' and all arms.
   2787             return
   2788         if next_conditional.group(1) == "else if":
   2789             current_pos = _find_in_lines(r'\(', lines, current_pos, None)
   2790         else:
   2791             current_pos.column += 4  # skip 'else'
   2792             expect_conditional_expression = False
   2793             search_for_else_clause = False
   2794     # End while loop
   2795 
   2796 def check_style(clean_lines, line_number, file_extension, class_state, file_state, enum_state, error):
   2797     """Checks rules from the 'C++ style rules' section of cppguide.html.
   2798 
   2799     Most of these rules are hard to test (naming, comment style), but we
   2800     do what we can.  In particular we check for 4-space indents, line lengths,
   2801     tab usage, spaces inside code, etc.
   2802 
   2803     Args:
   2804       clean_lines: A CleansedLines instance containing the file.
   2805       line_number: The number of the line to check.
   2806       file_extension: The extension (without the dot) of the filename.
   2807       class_state: A _ClassState instance which maintains information about
   2808                    the current stack of nested class declarations being parsed.
   2809       file_state: A _FileState instance which maintains information about
   2810                   the state of things in the file.
   2811       enum_state: A _EnumState instance which maintains the current enum state.
   2812       error: The function to call with any errors found.
   2813     """
   2814 
   2815     raw_lines = clean_lines.raw_lines
   2816     line = raw_lines[line_number]
   2817 
   2818     if line.find('\t') != -1:
   2819         error(line_number, 'whitespace/tab', 1,
   2820               'Tab found; better to use spaces')
   2821 
   2822     cleansed_line = clean_lines.elided[line_number]
   2823     if line and line[-1].isspace():
   2824         error(line_number, 'whitespace/end_of_line', 4,
   2825               'Line ends in whitespace.  Consider deleting these extra spaces.')
   2826 
   2827     if (cleansed_line.count(';') > 1
   2828         # for loops are allowed two ;'s (and may run over two lines).
   2829         and cleansed_line.find('for') == -1
   2830         and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
   2831              or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
   2832         # It's ok to have many commands in a switch case that fits in 1 line
   2833         and not ((cleansed_line.find('case ') != -1
   2834                   or cleansed_line.find('default:') != -1)
   2835                  and cleansed_line.find('break;') != -1)
   2836         # Also it's ok to have many commands in trivial single-line accessors in class definitions.
   2837         and not (match(r'.*\(.*\).*{.*.}', line)
   2838                  and class_state.classinfo_stack
   2839                  and line.count('{') == line.count('}'))
   2840         and not cleansed_line.startswith('#define ')
   2841         # It's ok to use use WTF_MAKE_NONCOPYABLE and WTF_MAKE_FAST_ALLOCATED macros in 1 line
   2842         and not (cleansed_line.find("WTF_MAKE_NONCOPYABLE") != -1
   2843                  and cleansed_line.find("WTF_MAKE_FAST_ALLOCATED") != -1)):
   2844         error(line_number, 'whitespace/newline', 4,
   2845               'More than one command on the same line')
   2846 
   2847     if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
   2848         error(line_number, 'whitespace/operators', 4,
   2849               'Boolean expressions that span multiple lines should have their '
   2850               'operators on the left side of the line instead of the right side.')
   2851 
   2852     # Some more style checks
   2853     check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error)
   2854     check_directive_indentation(clean_lines, line_number, file_state, error)
   2855     check_using_std(clean_lines, line_number, file_state, error)
   2856     check_max_min_macros(clean_lines, line_number, file_state, error)
   2857     check_ctype_functions(clean_lines, line_number, file_state, error)
   2858     check_switch_indentation(clean_lines, line_number, error)
   2859     check_braces(clean_lines, line_number, error)
   2860     check_exit_statement_simplifications(clean_lines, line_number, error)
   2861     check_spacing(file_extension, clean_lines, line_number, error)
   2862     check_check(clean_lines, line_number, error)
   2863     check_for_comparisons_to_boolean(clean_lines, line_number, error)
   2864     check_for_null(clean_lines, line_number, file_state, error)
   2865     check_indentation_amount(clean_lines, line_number, error)
   2866     check_enum_casing(clean_lines, line_number, enum_state, error)
   2867 
   2868 
   2869 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
   2870 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
   2871 # Matches the first component of a filename delimited by -s and _s. That is:
   2872 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
   2873 #  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
   2874 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
   2875 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
   2876 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
   2877 
   2878 
   2879 def _drop_common_suffixes(filename):
   2880     """Drops common suffixes like _test.cpp or -inl.h from filename.
   2881 
   2882     For example:
   2883       >>> _drop_common_suffixes('foo/foo-inl.h')
   2884       'foo/foo'
   2885       >>> _drop_common_suffixes('foo/bar/foo.cpp')
   2886       'foo/bar/foo'
   2887       >>> _drop_common_suffixes('foo/foo_internal.h')
   2888       'foo/foo'
   2889       >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
   2890       'foo/foo_unusualinternal'
   2891 
   2892     Args:
   2893       filename: The input filename.
   2894 
   2895     Returns:
   2896       The filename with the common suffix removed.
   2897     """
   2898     for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
   2899                    'inl.h', 'impl.h', 'internal.h'):
   2900         if (filename.endswith(suffix) and len(filename) > len(suffix)
   2901             and filename[-len(suffix) - 1] in ('-', '_')):
   2902             return filename[:-len(suffix) - 1]
   2903     return os.path.splitext(filename)[0]
   2904 
   2905 
   2906 def _classify_include(filename, include, is_system, include_state):
   2907     """Figures out what kind of header 'include' is.
   2908 
   2909     Args:
   2910       filename: The current file cpp_style is running over.
   2911       include: The path to a #included file.
   2912       is_system: True if the #include used <> rather than "".
   2913       include_state: An _IncludeState instance in which the headers are inserted.
   2914 
   2915     Returns:
   2916       One of the _XXX_HEADER constants.
   2917 
   2918     For example:
   2919       >>> _classify_include('foo.cpp', 'config.h', False)
   2920       _CONFIG_HEADER
   2921       >>> _classify_include('foo.cpp', 'foo.h', False)
   2922       _PRIMARY_HEADER
   2923       >>> _classify_include('foo.cpp', 'bar.h', False)
   2924       _OTHER_HEADER
   2925     """
   2926 
   2927     # If it is a system header we know it is classified as _OTHER_HEADER.
   2928     if is_system and not include.startswith('public/'):
   2929         return _OTHER_HEADER
   2930 
   2931     # If the include is named config.h then this is WebCore/config.h.
   2932     if include == "config.h":
   2933         return _CONFIG_HEADER
   2934 
   2935     # There cannot be primary includes in header files themselves. Only an
   2936     # include exactly matches the header filename will be is flagged as
   2937     # primary, so that it triggers the "don't include yourself" check.
   2938     if filename.endswith('.h') and filename != include:
   2939         return _OTHER_HEADER;
   2940 
   2941     # Qt's moc files do not follow the naming and ordering rules, so they should be skipped
   2942     if include.startswith('moc_') and include.endswith('.cpp'):
   2943         return _MOC_HEADER
   2944 
   2945     if include.endswith('.moc'):
   2946         return _MOC_HEADER
   2947 
   2948     # If the target file basename starts with the include we're checking
   2949     # then we consider it the primary header.
   2950     target_base = FileInfo(filename).base_name()
   2951     include_base = FileInfo(include).base_name()
   2952 
   2953     # If we haven't encountered a primary header, then be lenient in checking.
   2954     if not include_state.visited_primary_section():
   2955         if target_base.find(include_base) != -1:
   2956             return _PRIMARY_HEADER
   2957         # Qt private APIs use _p.h suffix.
   2958         if include_base.find(target_base) != -1 and include_base.endswith('_p'):
   2959             return _PRIMARY_HEADER
   2960 
   2961     # If we already encountered a primary header, perform a strict comparison.
   2962     # In case the two filename bases are the same then the above lenient check
   2963     # probably was a false positive.
   2964     elif include_state.visited_primary_section() and target_base == include_base:
   2965         if include == "ResourceHandleWin.h":
   2966             # FIXME: Thus far, we've only seen one example of these, but if we
   2967             # start to see more, please consider generalizing this check
   2968             # somehow.
   2969             return _OTHER_HEADER
   2970         return _PRIMARY_HEADER
   2971 
   2972     return _OTHER_HEADER
   2973 
   2974 
   2975 def _does_primary_header_exist(filename):
   2976     """Return a primary header file name for a file, or empty string
   2977     if the file is not source file or primary header does not exist.
   2978     """
   2979     fileinfo = FileInfo(filename)
   2980     if not fileinfo.is_source():
   2981         return False
   2982     primary_header = fileinfo.no_extension() + ".h"
   2983     return os.path.isfile(primary_header)
   2984 
   2985 
   2986 def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error):
   2987     """Check rules that are applicable to #include lines.
   2988 
   2989     Strings on #include lines are NOT removed from elided line, to make
   2990     certain tasks easier. However, to prevent false positives, checks
   2991     applicable to #include lines in CheckLanguage must be put here.
   2992 
   2993     Args:
   2994       filename: The name of the current file.
   2995       file_extension: The current file extension, without the leading dot.
   2996       clean_lines: A CleansedLines instance containing the file.
   2997       line_number: The number of the line to check.
   2998       include_state: An _IncludeState instance in which the headers are inserted.
   2999       error: The function to call with any errors found.
   3000     """
   3001     # FIXME: For readability or as a possible optimization, consider
   3002     #        exiting early here by checking whether the "build/include"
   3003     #        category should be checked for the given filename.  This
   3004     #        may involve having the error handler classes expose a
   3005     #        should_check() method, in addition to the usual __call__
   3006     #        method.
   3007     line = clean_lines.lines[line_number]
   3008 
   3009     matched = _RE_PATTERN_INCLUDE.search(line)
   3010     if not matched:
   3011         return
   3012 
   3013     include = matched.group(2)
   3014     is_system = (matched.group(1) == '<')
   3015 
   3016     # Look for any of the stream classes that are part of standard C++.
   3017     if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
   3018         error(line_number, 'readability/streams', 3,
   3019               'Streams are highly discouraged.')
   3020 
   3021     # Look for specific includes to fix.
   3022     if include.startswith('wtf/') and is_system:
   3023         error(line_number, 'build/include', 4,
   3024               'wtf includes should be "wtf/file.h" instead of <wtf/file.h>.')
   3025 
   3026     if filename.find('/chromium/') != -1 and include.startswith('cc/CC'):
   3027         error(line_number, 'build/include', 4,
   3028               'cc includes should be "CCFoo.h" instead of "cc/CCFoo.h".')
   3029 
   3030     duplicate_header = include in include_state
   3031     if duplicate_header:
   3032         error(line_number, 'build/include', 4,
   3033               '"%s" already included at %s:%s' %
   3034               (include, filename, include_state[include]))
   3035     else:
   3036         include_state[include] = line_number
   3037 
   3038     header_type = _classify_include(filename, include, is_system, include_state)
   3039     primary_header_exists = _does_primary_header_exist(filename)
   3040     include_state.header_types[line_number] = header_type
   3041 
   3042     # Only proceed if this isn't a duplicate header.
   3043     if duplicate_header:
   3044         return
   3045 
   3046     # We want to ensure that headers appear in the right order:
   3047     # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
   3048     # 2) for header files: alphabetically sorted
   3049     # The include_state object keeps track of the last type seen
   3050     # and complains if the header types are out of order or missing.
   3051     error_message = include_state.check_next_include_order(header_type,
   3052                                                            file_extension == "h",
   3053                                                            primary_header_exists)
   3054 
   3055     # Check to make sure we have a blank line after primary header.
   3056     if not error_message and header_type == _PRIMARY_HEADER:
   3057          next_line = clean_lines.raw_lines[line_number + 1]
   3058          if not is_blank_line(next_line):
   3059             error(line_number, 'build/include_order', 4,
   3060                   'You should add a blank line after implementation file\'s own header.')
   3061 
   3062     # Check to make sure all headers besides config.h and the primary header are
   3063     # alphabetically sorted. Skip Qt's moc files.
   3064     if not error_message and header_type == _OTHER_HEADER:
   3065          previous_line_number = line_number - 1;
   3066          previous_line = clean_lines.lines[previous_line_number]
   3067          previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   3068          while (not previous_match and previous_line_number > 0
   3069                 and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
   3070             previous_line_number -= 1;
   3071             previous_line = clean_lines.lines[previous_line_number]
   3072             previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   3073          if previous_match:
   3074             previous_header_type = include_state.header_types[previous_line_number]
   3075             if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
   3076                 # This type of error is potentially a problem with this line or the previous one,
   3077                 # so if the error is filtered for one line, report it for the next. This is so that
   3078                 # we properly handle patches, for which only modified lines produce errors.
   3079                 if not error(line_number - 1, 'build/include_order', 4, 'Alphabetical sorting problem.'):
   3080                     error(line_number, 'build/include_order', 4, 'Alphabetical sorting problem.')
   3081 
   3082     if error_message:
   3083         if file_extension == 'h':
   3084             error(line_number, 'build/include_order', 4,
   3085                   '%s Should be: alphabetically sorted.' %
   3086                   error_message)
   3087         else:
   3088             error(line_number, 'build/include_order', 4,
   3089                   '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
   3090                   error_message)
   3091 
   3092 
   3093 def check_language(filename, clean_lines, line_number, file_extension, include_state,
   3094                    file_state, error):
   3095     """Checks rules from the 'C++ language rules' section of cppguide.html.
   3096 
   3097     Some of these rules are hard to test (function overloading, using
   3098     uint32 inappropriately), but we do the best we can.
   3099 
   3100     Args:
   3101       filename: The name of the current file.
   3102       clean_lines: A CleansedLines instance containing the file.
   3103       line_number: The number of the line to check.
   3104       file_extension: The extension (without the dot) of the filename.
   3105       include_state: An _IncludeState instance in which the headers are inserted.
   3106       file_state: A _FileState instance which maintains information about
   3107                   the state of things in the file.
   3108       error: The function to call with any errors found.
   3109     """
   3110     # If the line is empty or consists of entirely a comment, no need to
   3111     # check it.
   3112     line = clean_lines.elided[line_number]
   3113     if not line:
   3114         return
   3115 
   3116     matched = _RE_PATTERN_INCLUDE.search(line)
   3117     if matched:
   3118         check_include_line(filename, file_extension, clean_lines, line_number, include_state, error)
   3119         return
   3120 
   3121     # FIXME: figure out if they're using default arguments in fn proto.
   3122 
   3123     # Check to see if they're using an conversion function cast.
   3124     # I just try to capture the most common basic types, though there are more.
   3125     # Parameterless conversion functions, such as bool(), are allowed as they are
   3126     # probably a member operator declaration or default constructor.
   3127     matched = search(
   3128         r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
   3129     if matched:
   3130         # gMock methods are defined using some variant of MOCK_METHODx(name, type)
   3131         # where type may be float(), int(string), etc.  Without context they are
   3132         # virtually indistinguishable from int(x) casts.
   3133         if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
   3134             error(line_number, 'readability/casting', 4,
   3135                   'Using deprecated casting style.  '
   3136                   'Use static_cast<%s>(...) instead' %
   3137                   matched.group(1))
   3138 
   3139     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   3140                        'static_cast',
   3141                        r'\((int|float|double|bool|char|u?int(16|32|64))\)',
   3142                        error)
   3143     # This doesn't catch all cases.  Consider (const char * const)"hello".
   3144     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   3145                        'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
   3146 
   3147     # In addition, we look for people taking the address of a cast.  This
   3148     # is dangerous -- casts can assign to temporaries, so the pointer doesn't
   3149     # point where you think.
   3150     if search(
   3151         r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
   3152         error(line_number, 'runtime/casting', 4,
   3153               ('Are you taking an address of a cast?  '
   3154                'This is dangerous: could be a temp var.  '
   3155                'Take the address before doing the cast, rather than after'))
   3156 
   3157     # Check for people declaring static/global STL strings at the top level.
   3158     # This is dangerous because the C++ language does not guarantee that
   3159     # globals with constructors are initialized before the first access.
   3160     matched = match(
   3161         r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
   3162         line)
   3163     # Make sure it's not a function.
   3164     # Function template specialization looks like: "string foo<Type>(...".
   3165     # Class template definitions look like: "string Foo<Type>::Method(...".
   3166     if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
   3167                              matched.group(3)):
   3168         error(line_number, 'runtime/string', 4,
   3169               'For a static/global string constant, use a C style string instead: '
   3170               '"%schar %s[]".' %
   3171               (matched.group(1), matched.group(2)))
   3172 
   3173     # Check that we're not using RTTI outside of testing code.
   3174     if search(r'\bdynamic_cast<', line):
   3175         error(line_number, 'runtime/rtti', 5,
   3176               'Do not use dynamic_cast<>.  If you need to cast within a class '
   3177               "hierarchy, use static_cast<> to upcast.  Google doesn't support "
   3178               'RTTI.')
   3179 
   3180     if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
   3181         error(line_number, 'runtime/init', 4,
   3182               'You seem to be initializing a member variable with itself.')
   3183 
   3184     if file_extension == 'h':
   3185         # FIXME: check that 1-arg constructors are explicit.
   3186         #        How to tell it's a constructor?
   3187         #        (handled in check_for_non_standard_constructs for now)
   3188         pass
   3189 
   3190     # Check if people are using the verboten C basic types.  The only exception
   3191     # we regularly allow is "unsigned short port" for port.
   3192     if search(r'\bshort port\b', line):
   3193         if not search(r'\bunsigned short port\b', line):
   3194             error(line_number, 'runtime/int', 4,
   3195                   'Use "unsigned short" for ports, not "short"')
   3196 
   3197     # When snprintf is used, the second argument shouldn't be a literal.
   3198     matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
   3199     if matched:
   3200         error(line_number, 'runtime/printf', 3,
   3201               'If you can, use sizeof(%s) instead of %s as the 2nd arg '
   3202               'to snprintf.' % (matched.group(1), matched.group(2)))
   3203 
   3204     # Check if some verboten C functions are being used.
   3205     if search(r'\bsprintf\b', line):
   3206         error(line_number, 'runtime/printf', 5,
   3207               'Never use sprintf.  Use snprintf instead.')
   3208     matched = search(r'\b(strcpy|strcat)\b', line)
   3209     if matched:
   3210         error(line_number, 'runtime/printf', 4,
   3211               'Almost always, snprintf is better than %s' % matched.group(1))
   3212 
   3213     if search(r'\bsscanf\b', line):
   3214         error(line_number, 'runtime/printf', 1,
   3215               'sscanf can be ok, but is slow and can overflow buffers.')
   3216 
   3217     # Check for suspicious usage of "if" like
   3218     # } if (a == b) {
   3219     if search(r'\}\s*if\s*\(', line):
   3220         error(line_number, 'readability/braces', 4,
   3221               'Did you mean "else if"? If not, start a new line for "if".')
   3222 
   3223     # Check for potential format string bugs like printf(foo).
   3224     # We constrain the pattern not to pick things like DocidForPrintf(foo).
   3225     # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
   3226     matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
   3227     if matched:
   3228         error(line_number, 'runtime/printf', 4,
   3229               'Potential format string bug. Do %s("%%s", %s) instead.'
   3230               % (matched.group(1), matched.group(2)))
   3231 
   3232     # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
   3233     matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
   3234     if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
   3235         error(line_number, 'runtime/memset', 4,
   3236               'Did you mean "memset(%s, 0, %s)"?'
   3237               % (matched.group(1), matched.group(2)))
   3238 
   3239     # Detect variable-length arrays.
   3240     matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
   3241     if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
   3242         matched.group(3).find(']') == -1):
   3243         # Split the size using space and arithmetic operators as delimiters.
   3244         # If any of the resulting tokens are not compile time constants then
   3245         # report the error.
   3246         tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
   3247         is_const = True
   3248         skip_next = False
   3249         for tok in tokens:
   3250             if skip_next:
   3251                 skip_next = False
   3252                 continue
   3253 
   3254             if search(r'sizeof\(.+\)', tok):
   3255                 continue
   3256             if search(r'arraysize\(\w+\)', tok):
   3257                 continue
   3258 
   3259             tok = tok.lstrip('(')
   3260             tok = tok.rstrip(')')
   3261             if not tok:
   3262                 continue
   3263             if match(r'\d+', tok):
   3264                 continue
   3265             if match(r'0[xX][0-9a-fA-F]+', tok):
   3266                 continue
   3267             if match(r'k[A-Z0-9]\w*', tok):
   3268                 continue
   3269             if match(r'(.+::)?k[A-Z0-9]\w*', tok):
   3270                 continue
   3271             if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
   3272                 continue
   3273             # A catch all for tricky sizeof cases, including 'sizeof expression',
   3274             # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
   3275             # requires skipping the next token becasue we split on ' ' and '*'.
   3276             if tok.startswith('sizeof'):
   3277                 skip_next = True
   3278                 continue
   3279             is_const = False
   3280             break
   3281         if not is_const:
   3282             error(line_number, 'runtime/arrays', 1,
   3283                   'Do not use variable-length arrays.  Use an appropriately named '
   3284                   "('k' followed by CamelCase) compile-time constant for the size.")
   3285 
   3286     # Check for use of unnamed namespaces in header files.  Registration
   3287     # macros are typically OK, so we allow use of "namespace {" on lines
   3288     # that end with backslashes.
   3289     if (file_extension == 'h'
   3290         and search(r'\bnamespace\s*{', line)
   3291         and line[-1] != '\\'):
   3292         error(line_number, 'build/namespaces', 4,
   3293               'Do not use unnamed namespaces in header files.  See '
   3294               'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
   3295               ' for more information.')
   3296 
   3297     # Check for plain bitfields declared without either "singed" or "unsigned".
   3298     # Most compilers treat such bitfields as signed, but there are still compilers like
   3299     # RVCT 4.0 that use unsigned by default.
   3300     matched = re.match(r'\s*((const|mutable)\s+)?(char|(short(\s+int)?)|int|long(\s+(long|int))?)\s+[a-zA-Z_][a-zA-Z0-9_]*\s*:\s*\d+\s*;', line)
   3301     if matched:
   3302         error(line_number, 'runtime/bitfields', 5,
   3303               'Please declare integral type bitfields with either signed or unsigned.')
   3304 
   3305     check_identifier_name_in_declaration(filename, line_number, line, file_state, error)
   3306 
   3307     # Check for unsigned int (should be just 'unsigned')
   3308     if search(r'\bunsigned int\b', line):
   3309         error(line_number, 'runtime/unsigned', 1,
   3310               'Omit int when using unsigned')
   3311 
   3312     # Check for usage of static_cast<Classname*>.
   3313     check_for_object_static_cast(filename, line_number, line, error)
   3314 
   3315 
   3316 def check_identifier_name_in_declaration(filename, line_number, line, file_state, error):
   3317     """Checks if identifier names contain any underscores.
   3318 
   3319     As identifiers in libraries we are using have a bunch of
   3320     underscores, we only warn about the declarations of identifiers
   3321     and don't check use of identifiers.
   3322 
   3323     Args:
   3324       filename: The name of the current file.
   3325       line_number: The number of the line to check.
   3326       line: The line of code to check.
   3327       file_state: A _FileState instance which maintains information about
   3328                   the state of things in the file.
   3329       error: The function to call with any errors found.
   3330     """
   3331     # We don't check return and delete statements and conversion operator declarations.
   3332     if match(r'\s*(return|delete|operator)\b', line):
   3333         return
   3334 
   3335     # Basically, a declaration is a type name followed by whitespaces
   3336     # followed by an identifier. The type name can be complicated
   3337     # due to type adjectives and templates. We remove them first to
   3338     # simplify the process to find declarations of identifiers.
   3339 
   3340     # Convert "long long", "long double", and "long long int" to
   3341     # simple types, but don't remove simple "long".
   3342     line = sub(r'long (long )?(?=long|double|int)', '', line)
   3343     # Convert unsigned/signed types to simple types, too.
   3344     line = sub(r'(unsigned|signed) (?=char|short|int|long)', '', line)
   3345     line = sub(r'\b(inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line)
   3346 
   3347     # Remove "new" and "new (expr)" to simplify, too.
   3348     line = sub(r'new\s*(\([^)]*\))?', '', line)
   3349 
   3350     # Remove all template parameters by removing matching < and >.
   3351     # Loop until no templates are removed to remove nested templates.
   3352     while True:
   3353         line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line)
   3354         if not number_of_replacements:
   3355             break
   3356 
   3357     # Declarations of local variables can be in condition expressions
   3358     # of control flow statements (e.g., "if (RenderObject* p = o->parent())").
   3359     # We remove the keywords and the first parenthesis.
   3360     #
   3361     # Declarations in "while", "if", and "switch" are different from
   3362     # other declarations in two aspects:
   3363     #
   3364     # - There can be only one declaration between the parentheses.
   3365     #   (i.e., you cannot write "if (int i = 0, j = 1) {}")
   3366     # - The variable must be initialized.
   3367     #   (i.e., you cannot write "if (int i) {}")
   3368     #
   3369     # and we will need different treatments for them.
   3370     line = sub(r'^\s*for\s*\(', '', line)
   3371     line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line)
   3372 
   3373     # Detect variable and functions.
   3374     type_regexp = r'\w([\w]|\s*[*&]\s*|::)+'
   3375     attribute_regexp = r'ALLOW_UNUSED'
   3376     identifier_regexp = r'(?!' + attribute_regexp + r')(?P<identifier>[\w:]+)'
   3377     maybe_bitfield_regexp = r'(:\s*\d+\s*)?'
   3378     character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)'
   3379     declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*(' + attribute_regexp + r')?\s*' + maybe_bitfield_regexp + character_after_identifier_regexp
   3380     declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp
   3381     is_function_arguments = False
   3382     number_of_identifiers = 0
   3383     while True:
   3384         # If we are seeing the first identifier or arguments of a
   3385         # function, there should be a type name before an identifier.
   3386         if not number_of_identifiers or is_function_arguments:
   3387             declaration_regexp = declaration_with_type_regexp
   3388         else:
   3389             declaration_regexp = declaration_without_type_regexp
   3390 
   3391         matched = match(declaration_regexp, line)
   3392         if not matched:
   3393             return
   3394         identifier = matched.group('identifier')
   3395         character_after_identifier = matched.group('character_after_identifier')
   3396 
   3397         # If we removed a non-for-control statement, the character after
   3398         # the identifier should be '='. With this rule, we can avoid
   3399         # warning for cases like "if (val & INT_MAX) {".
   3400         if control_statement and character_after_identifier != '=':
   3401             return
   3402 
   3403         is_function_arguments = is_function_arguments or character_after_identifier == '('
   3404 
   3405         # Remove "m_" and "s_" to allow them.
   3406         modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier)
   3407         if not file_state.is_objective_c() and modified_identifier.find('_') >= 0:
   3408             # Various exceptions to the rule: JavaScript op codes functions, const_iterator.
   3409             if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('op_') >= 0)
   3410                 and not (filename.find('gtk') >= 0 and modified_identifier.startswith('webkit_') >= 0)
   3411                 and not modified_identifier.startswith('tst_')
   3412                 and not modified_identifier.startswith('webkit_dom_object_')
   3413                 and not modified_identifier.startswith('webkit_soup')
   3414                 and not modified_identifier.startswith('NPN_')
   3415                 and not modified_identifier.startswith('NPP_')
   3416                 and not modified_identifier.startswith('NP_')
   3417                 and not modified_identifier.startswith('qt_')
   3418                 and not modified_identifier.startswith('_q_')
   3419                 and not modified_identifier.startswith('cairo_')
   3420                 and not modified_identifier.startswith('Ecore_')
   3421                 and not modified_identifier.startswith('Eina_')
   3422                 and not modified_identifier.startswith('Evas_')
   3423                 and not modified_identifier.startswith('Ewk_')
   3424                 and not modified_identifier.startswith('cti_')
   3425                 and not modified_identifier.find('::qt_') >= 0
   3426                 and not modified_identifier.find('::_q_') >= 0
   3427                 and not modified_identifier == "const_iterator"
   3428                 and not modified_identifier == "vm_throw"
   3429                 and not modified_identifier == "DFG_OPERATION"):
   3430                 error(line_number, 'readability/naming/underscores', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.")
   3431 
   3432         # Check for variables named 'l', these are too easy to confuse with '1' in some fonts
   3433         if modified_identifier == 'l':
   3434             error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.")
   3435 
   3436         # There can be only one declaration in non-for-control statements.
   3437         if control_statement:
   3438             return
   3439         # We should continue checking if this is a function
   3440         # declaration because we need to check its arguments.
   3441         # Also, we need to check multiple declarations.
   3442         if character_after_identifier != '(' and character_after_identifier != ',':
   3443             return
   3444 
   3445         number_of_identifiers += 1
   3446         line = line[matched.end():]
   3447 
   3448 
   3449 def check_for_toFoo_definition(filename, pattern, error):
   3450     """ Reports for using static_cast instead of toFoo convenience function.
   3451 
   3452     This function will output warnings to make sure you are actually using
   3453     the added toFoo conversion functions rather than directly hard coding
   3454     the static_cast<Classname*> call. For example, you should toHTMLELement(Node*)
   3455     to convert Node* to HTMLElement*, instead of static_cast<HTMLElement*>(Node*)
   3456 
   3457     Args:
   3458       filename: The name of the header file in which to check for toFoo definition.
   3459       pattern: The conversion function pattern to grep for.
   3460       error: The function to call with any errors found.
   3461     """
   3462     def get_abs_filepath(filename):
   3463         fileSystem = FileSystem()
   3464         base_dir = fileSystem.path_to_module(FileSystem.__module__).split('WebKit', 1)[0]
   3465         base_dir = ''.join((base_dir, 'WebKit/Source'))
   3466         for root, dirs, names in os.walk(base_dir):
   3467             if filename in names:
   3468                 return os.path.join(root, filename)
   3469         return None
   3470 
   3471     def grep(lines, pattern, error):
   3472         matches = []
   3473         function_state = None
   3474         for line_number in xrange(lines.num_lines()):
   3475             line = (lines.elided[line_number]).rstrip()
   3476             try:
   3477                 if pattern in line:
   3478                     if not function_state:
   3479                         function_state = _FunctionState(1)
   3480                     detect_functions(lines, line_number, function_state, error)
   3481                     # Exclude the match of dummy conversion function. Dummy function is just to
   3482                     # catch invalid conversions and shouldn't be part of possible alternatives.
   3483                     result = re.search(r'%s(\s+)%s' % ("void", pattern), line)
   3484                     if not result:
   3485                         matches.append([line, function_state.body_start_position.row, function_state.end_position.row + 1])
   3486                         function_state = None
   3487             except UnicodeDecodeError:
   3488                 # There would be no non-ascii characters in the codebase ever. The only exception
   3489                 # would be comments/copyright text which might have non-ascii characters. Hence,
   3490                 # it is prefectly safe to catch the UnicodeDecodeError and just pass the line.
   3491                 pass
   3492 
   3493         return matches
   3494 
   3495     def check_in_mock_header(filename, matches=None):
   3496         if not filename == 'Foo.h':
   3497             return False
   3498 
   3499         header_file = None
   3500         try:
   3501             header_file = CppChecker.fs.read_text_file(filename)
   3502         except IOError:
   3503             return False
   3504         line_number = 0
   3505         for line in header_file:
   3506             line_number += 1
   3507             matched = re.search(r'\btoFoo\b', line)
   3508             if matched:
   3509                 matches.append(['toFoo', line_number, line_number + 3])
   3510         return True
   3511 
   3512     # For unit testing only, avoid header search and lookup locally.
   3513     matches = []
   3514     mock_def_found = check_in_mock_header(filename, matches)
   3515     if mock_def_found:
   3516         return matches
   3517 
   3518     # Regular style check flow. Search for actual header file & defs.
   3519     file_path = get_abs_filepath(filename)
   3520     if not file_path:
   3521         return None
   3522     try:
   3523         f = open(file_path)
   3524         clean_lines = CleansedLines(f.readlines())
   3525     finally:
   3526         f.close()
   3527 
   3528     # Make a list of all genuine alternatives to static_cast.
   3529     matches = grep(clean_lines, pattern, error)
   3530     return matches
   3531 
   3532 
   3533 def check_for_object_static_cast(processing_file, line_number, line, error):
   3534     """Checks for a Cpp-style static cast on objects by looking for the pattern.
   3535 
   3536     Args:
   3537       processing_file: The name of the processing file.
   3538       line_number: The number of the line to check.
   3539       line: The line of code to check.
   3540       error: The function to call with any errors found.
   3541     """
   3542     matched = search(r'\bstatic_cast<(\s*\w*:?:?\w+\s*\*+\s*)>', line)
   3543     if not matched:
   3544         return
   3545 
   3546     class_name = re.sub('[\*]', '', matched.group(1))
   3547     class_name = class_name.strip()
   3548     # Ignore (for now) when the casting is to void*,
   3549     if class_name == 'void':
   3550         return
   3551 
   3552     namespace_pos = class_name.find(':')
   3553     if not namespace_pos == -1:
   3554         class_name = class_name[namespace_pos + 2:]
   3555 
   3556     header_file = ''.join((class_name, '.h'))
   3557     matches = check_for_toFoo_definition(header_file, ''.join(('to', class_name)), error)
   3558     # Ignore (for now) if not able to find the header where toFoo might be defined.
   3559     # TODO: Handle cases where Classname might be defined in some other header or cpp file.
   3560     if matches is None:
   3561         return
   3562 
   3563     report_error = True
   3564     # Ensure found static_cast instance is not from within toFoo definition itself.
   3565     if (os.path.basename(processing_file) == header_file):
   3566         for item in matches:
   3567             if line_number in range(item[1], item[2]):
   3568                 report_error = False
   3569                 break
   3570 
   3571     if report_error:
   3572         if len(matches):
   3573             # toFoo is defined - enforce using it.
   3574             # TODO: Suggest an appropriate toFoo from the alternatives present in matches.
   3575             error(line_number, 'runtime/casting', 4,
   3576                   'static_cast of class objects is not allowed. Use to%s defined in %s.' %
   3577                   (class_name, header_file))
   3578         else:
   3579             # No toFoo defined - enforce definition & usage.
   3580             # TODO: Automate the generation of toFoo() to avoid any slippages ever.
   3581             error(line_number, 'runtime/casting', 4,
   3582                   'static_cast of class objects is not allowed. Add to%s in %s and use it instead.' %
   3583                   (class_name, header_file))
   3584 
   3585 
   3586 def check_c_style_cast(line_number, line, raw_line, cast_type, pattern,
   3587                        error):
   3588     """Checks for a C-style cast by looking for the pattern.
   3589 
   3590     This also handles sizeof(type) warnings, due to similarity of content.
   3591 
   3592     Args:
   3593       line_number: The number of the line to check.
   3594       line: The line of code to check.
   3595       raw_line: The raw line of code to check, with comments.
   3596       cast_type: The string for the C++ cast to recommend.  This is either
   3597                  reinterpret_cast or static_cast, depending.
   3598       pattern: The regular expression used to find C-style casts.
   3599       error: The function to call with any errors found.
   3600     """
   3601     matched = search(pattern, line)
   3602     if not matched:
   3603         return
   3604 
   3605     # e.g., sizeof(int)
   3606     sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
   3607     if sizeof_match:
   3608         error(line_number, 'runtime/sizeof', 1,
   3609               'Using sizeof(type).  Use sizeof(varname) instead if possible')
   3610         return
   3611 
   3612     remainder = line[matched.end(0):]
   3613 
   3614     # The close paren is for function pointers as arguments to a function.
   3615     # eg, void foo(void (*bar)(int));
   3616     # The semicolon check is a more basic function check; also possibly a
   3617     # function pointer typedef.
   3618     # eg, void foo(int); or void foo(int) const;
   3619     # The equals check is for function pointer assignment.
   3620     # eg, void *(*foo)(int) = ...
   3621     #
   3622     # Right now, this will only catch cases where there's a single argument, and
   3623     # it's unnamed.  It should probably be expanded to check for multiple
   3624     # arguments with some unnamed.
   3625     function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
   3626     if function_match:
   3627         if (not function_match.group(3)
   3628             or function_match.group(3) == ';'
   3629             or raw_line.find('/*') < 0):
   3630             error(line_number, 'readability/function', 3,
   3631                   'All parameters should be named in a function')
   3632         return
   3633 
   3634     # At this point, all that should be left is actual casts.
   3635     error(line_number, 'readability/casting', 4,
   3636           'Using C-style cast.  Use %s<%s>(...) instead' %
   3637           (cast_type, matched.group(1)))
   3638 
   3639 
   3640 _HEADERS_CONTAINING_TEMPLATES = (
   3641     ('<deque>', ('deque',)),
   3642     ('<functional>', ('unary_function', 'binary_function',
   3643                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
   3644                       'negate',
   3645                       'equal_to', 'not_equal_to', 'greater', 'less',
   3646                       'greater_equal', 'less_equal',
   3647                       'logical_and', 'logical_or', 'logical_not',
   3648                       'unary_negate', 'not1', 'binary_negate', 'not2',
   3649                       'bind1st', 'bind2nd',
   3650                       'pointer_to_unary_function',
   3651                       'pointer_to_binary_function',
   3652                       'ptr_fun',
   3653                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
   3654                       'mem_fun_ref_t',
   3655                       'const_mem_fun_t', 'const_mem_fun1_t',
   3656                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
   3657                       'mem_fun_ref',
   3658                      )),
   3659     ('<limits>', ('numeric_limits',)),
   3660     ('<list>', ('list',)),
   3661     ('<map>', ('map', 'multimap',)),
   3662     ('<memory>', ('allocator',)),
   3663     ('<queue>', ('queue', 'priority_queue',)),
   3664     ('<set>', ('set', 'multiset',)),
   3665     ('<stack>', ('stack',)),
   3666     ('<string>', ('char_traits', 'basic_string',)),
   3667     ('<utility>', ('pair',)),
   3668     ('<vector>', ('vector',)),
   3669 
   3670     # gcc extensions.
   3671     # Note: std::hash is their hash, ::hash is our hash
   3672     ('<hash_map>', ('hash_map', 'hash_multimap',)),
   3673     ('<hash_set>', ('hash_set', 'hash_multiset',)),
   3674     ('<slist>', ('slist',)),
   3675     )
   3676 
   3677 _HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
   3678     # We can trust with reasonable confidence that map gives us pair<>, too.
   3679     'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
   3680 }
   3681 
   3682 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
   3683 
   3684 _re_pattern_algorithm_header = []
   3685 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
   3686                   'transform'):
   3687     # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   3688     # type::max().
   3689     _re_pattern_algorithm_header.append(
   3690         (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
   3691          _template,
   3692          '<algorithm>'))
   3693 
   3694 _re_pattern_templates = []
   3695 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
   3696     for _template in _templates:
   3697         _re_pattern_templates.append(
   3698             (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
   3699              _template + '<>',
   3700              _header))
   3701 
   3702 
   3703 def files_belong_to_same_module(filename_cpp, filename_h):
   3704     """Check if these two filenames belong to the same module.
   3705 
   3706     The concept of a 'module' here is a as follows:
   3707     foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
   3708     same 'module' if they are in the same directory.
   3709     some/path/public/xyzzy and some/path/internal/xyzzy are also considered
   3710     to belong to the same module here.
   3711 
   3712     If the filename_cpp contains a longer path than the filename_h, for example,
   3713     '/absolute/path/to/base/sysinfo.cpp', and this file would include
   3714     'base/sysinfo.h', this function also produces the prefix needed to open the
   3715     header. This is used by the caller of this function to more robustly open the
   3716     header file. We don't have access to the real include paths in this context,
   3717     so we need this guesswork here.
   3718 
   3719     Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
   3720     according to this implementation. Because of this, this function gives
   3721     some false positives. This should be sufficiently rare in practice.
   3722 
   3723     Args:
   3724       filename_cpp: is the path for the .cpp file
   3725       filename_h: is the path for the header path
   3726 
   3727     Returns:
   3728       Tuple with a bool and a string:
   3729       bool: True if filename_cpp and filename_h belong to the same module.
   3730       string: the additional prefix needed to open the header file.
   3731     """
   3732 
   3733     if not filename_cpp.endswith('.cpp'):
   3734         return (False, '')
   3735     filename_cpp = filename_cpp[:-len('.cpp')]
   3736     if filename_cpp.endswith('_unittest'):
   3737         filename_cpp = filename_cpp[:-len('_unittest')]
   3738     elif filename_cpp.endswith('_test'):
   3739         filename_cpp = filename_cpp[:-len('_test')]
   3740     filename_cpp = filename_cpp.replace('/public/', '/')
   3741     filename_cpp = filename_cpp.replace('/internal/', '/')
   3742 
   3743     if not filename_h.endswith('.h'):
   3744         return (False, '')
   3745     filename_h = filename_h[:-len('.h')]
   3746     if filename_h.endswith('-inl'):
   3747         filename_h = filename_h[:-len('-inl')]
   3748     filename_h = filename_h.replace('/public/', '/')
   3749     filename_h = filename_h.replace('/internal/', '/')
   3750 
   3751     files_belong_to_same_module = filename_cpp.endswith(filename_h)
   3752     common_path = ''
   3753     if files_belong_to_same_module:
   3754         common_path = filename_cpp[:-len(filename_h)]
   3755     return files_belong_to_same_module, common_path
   3756 
   3757 
   3758 def update_include_state(filename, include_state):
   3759     """Fill up the include_state with new includes found from the file.
   3760 
   3761     Args:
   3762       filename: the name of the header to read.
   3763       include_state: an _IncludeState instance in which the headers are inserted.
   3764       io: The io factory to use to read the file. Provided for testability.
   3765 
   3766     Returns:
   3767       True if a header was succesfully added. False otherwise.
   3768     """
   3769     header_file = None
   3770     try:
   3771         header_file = CppChecker.fs.read_text_file(filename)
   3772     except IOError:
   3773         return False
   3774     line_number = 0
   3775     for line in header_file:
   3776         line_number += 1
   3777         clean_line = cleanse_comments(line)
   3778         matched = _RE_PATTERN_INCLUDE.search(clean_line)
   3779         if matched:
   3780             include = matched.group(2)
   3781             # The value formatting is cute, but not really used right now.
   3782             # What matters here is that the key is in include_state.
   3783             include_state.setdefault(include, '%s:%d' % (filename, line_number))
   3784     return True
   3785 
   3786 
   3787 def check_for_include_what_you_use(filename, clean_lines, include_state, error):
   3788     """Reports for missing stl includes.
   3789 
   3790     This function will output warnings to make sure you are including the headers
   3791     necessary for the stl containers and functions that you use. We only give one
   3792     reason to include a header. For example, if you use both equal_to<> and
   3793     less<> in a .h file, only one (the latter in the file) of these will be
   3794     reported as a reason to include the <functional>.
   3795 
   3796     Args:
   3797       filename: The name of the current file.
   3798       clean_lines: A CleansedLines instance containing the file.
   3799       include_state: An _IncludeState instance.
   3800       error: The function to call with any errors found.
   3801     """
   3802     required = {}  # A map of header name to line_number and the template entity.
   3803         # Example of required: { '<functional>': (1219, 'less<>') }
   3804 
   3805     for line_number in xrange(clean_lines.num_lines()):
   3806         line = clean_lines.elided[line_number]
   3807         if not line or line[0] == '#':
   3808             continue
   3809 
   3810         # String is special -- it is a non-templatized type in STL.
   3811         if _RE_PATTERN_STRING.search(line):
   3812             required['<string>'] = (line_number, 'string')
   3813 
   3814         for pattern, template, header in _re_pattern_algorithm_header:
   3815             if pattern.search(line):
   3816                 required[header] = (line_number, template)
   3817 
   3818         # The following function is just a speed up, no semantics are changed.
   3819         if not '<' in line:  # Reduces the cpu time usage by skipping lines.
   3820             continue
   3821 
   3822         for pattern, template, header in _re_pattern_templates:
   3823             if pattern.search(line):
   3824                 required[header] = (line_number, template)
   3825 
   3826     # The policy is that if you #include something in foo.h you don't need to
   3827     # include it again in foo.cpp. Here, we will look at possible includes.
   3828     # Let's copy the include_state so it is only messed up within this function.
   3829     include_state = include_state.copy()
   3830 
   3831     # Did we find the header for this file (if any) and succesfully load it?
   3832     header_found = False
   3833 
   3834     # Use the absolute path so that matching works properly.
   3835     abs_filename = os.path.abspath(filename)
   3836 
   3837     # For Emacs's flymake.
   3838     # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
   3839     # by flymake and that file name might end with '_flymake.cpp'. In that case,
   3840     # restore original file name here so that the corresponding header file can be
   3841     # found.
   3842     # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
   3843     # instead of 'foo_flymake.h'
   3844     abs_filename = re.sub(r'_flymake\.cpp$', '.cpp', abs_filename)
   3845 
   3846     # include_state is modified during iteration, so we iterate over a copy of
   3847     # the keys.
   3848     for header in include_state.keys():  #NOLINT
   3849         (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
   3850         fullpath = common_path + header
   3851         if same_module and update_include_state(fullpath, include_state):
   3852             header_found = True
   3853 
   3854     # If we can't find the header file for a .cpp, assume it's because we don't
   3855     # know where to look. In that case we'll give up as we're not sure they
   3856     # didn't include it in the .h file.
   3857     # FIXME: Do a better job of finding .h files so we are confident that
   3858     #        not having the .h file means there isn't one.
   3859     if filename.endswith('.cpp') and not header_found:
   3860         return
   3861 
   3862     # All the lines have been processed, report the errors found.
   3863     for required_header_unstripped in required:
   3864         template = required[required_header_unstripped][1]
   3865         if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
   3866             headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
   3867             if [True for header in headers if header in include_state]:
   3868                 continue
   3869         if required_header_unstripped.strip('<>"') not in include_state:
   3870             error(required[required_header_unstripped][0],
   3871                   'build/include_what_you_use', 4,
   3872                   'Add #include ' + required_header_unstripped + ' for ' + template)
   3873 
   3874 
   3875 def process_line(filename, file_extension,
   3876                  clean_lines, line, include_state, function_state,
   3877                  class_state, file_state, enum_state, error):
   3878     """Processes a single line in the file.
   3879 
   3880     Args:
   3881       filename: Filename of the file that is being processed.
   3882       file_extension: The extension (dot not included) of the file.
   3883       clean_lines: An array of strings, each representing a line of the file,
   3884                    with comments stripped.
   3885       line: Number of line being processed.
   3886       include_state: An _IncludeState instance in which the headers are inserted.
   3887       function_state: A _FunctionState instance which counts function lines, etc.
   3888       class_state: A _ClassState instance which maintains information about
   3889                    the current stack of nested class declarations being parsed.
   3890       file_state: A _FileState instance which maintains information about
   3891                   the state of things in the file.
   3892       enum_state: A _EnumState instance which maintains an enum declaration
   3893                   state.
   3894       error: A callable to which errors are reported, which takes arguments:
   3895              line number, error level, and message
   3896 
   3897     """
   3898     raw_lines = clean_lines.raw_lines
   3899     detect_functions(clean_lines, line, function_state, error)
   3900     check_for_function_lengths(clean_lines, line, function_state, error)
   3901     if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
   3902         return
   3903     if match(r'\s*\b__asm\b', raw_lines[line]):  # Ignore asm lines as they format differently.
   3904         return
   3905     check_function_definition(filename, file_extension, clean_lines, line, function_state, error)
   3906     check_pass_ptr_usage(clean_lines, line, function_state, error)
   3907     check_for_leaky_patterns(clean_lines, line, function_state, error)
   3908     check_for_multiline_comments_and_strings(clean_lines, line, error)
   3909     check_style(clean_lines, line, file_extension, class_state, file_state, enum_state, error)
   3910     check_language(filename, clean_lines, line, file_extension, include_state,
   3911                    file_state, error)
   3912     check_for_non_standard_constructs(clean_lines, line, class_state, error)
   3913     check_posix_threading(clean_lines, line, error)
   3914     check_invalid_increment(clean_lines, line, error)
   3915     check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line, error)
   3916 
   3917 def _process_lines(filename, file_extension, lines, error, min_confidence):
   3918     """Performs lint checks and reports any errors to the given error function.
   3919 
   3920     Args:
   3921       filename: Filename of the file that is being processed.
   3922       file_extension: The extension (dot not included) of the file.
   3923       lines: An array of strings, each representing a line of the file, with the
   3924              last element being empty if the file is termined with a newline.
   3925       error: A callable to which errors are reported, which takes 4 arguments:
   3926     """
   3927     lines = (['// marker so line numbers and indices both start at 1'] + lines +
   3928              ['// marker so line numbers end in a known way'])
   3929 
   3930     include_state = _IncludeState()
   3931     function_state = _FunctionState(min_confidence)
   3932     class_state = _ClassState()
   3933 
   3934     check_for_copyright(lines, error)
   3935 
   3936     if file_extension == 'h':
   3937         check_for_header_guard(filename, lines, error)
   3938 
   3939     remove_multi_line_comments(lines, error)
   3940     clean_lines = CleansedLines(lines)
   3941     file_state = _FileState(clean_lines, file_extension)
   3942     enum_state = _EnumState()
   3943     for line in xrange(clean_lines.num_lines()):
   3944         process_line(filename, file_extension, clean_lines, line,
   3945                      include_state, function_state, class_state, file_state,
   3946                      enum_state, error)
   3947     class_state.check_finished(error)
   3948 
   3949     check_for_include_what_you_use(filename, clean_lines, include_state, error)
   3950 
   3951     # We check here rather than inside process_line so that we see raw
   3952     # lines rather than "cleaned" lines.
   3953     check_for_unicode_replacement_characters(lines, error)
   3954 
   3955     check_for_new_line_at_eof(lines, error)
   3956 
   3957 
   3958 class CppChecker(object):
   3959 
   3960     """Processes C++ lines for checking style."""
   3961 
   3962     # This list is used to--
   3963     #
   3964     # (1) generate an explicit list of all possible categories,
   3965     # (2) unit test that all checked categories have valid names, and
   3966     # (3) unit test that all categories are getting unit tested.
   3967     #
   3968     categories = set([
   3969         'build/class',
   3970         'build/deprecated',
   3971         'build/endif_comment',
   3972         'build/forward_decl',
   3973         'build/header_guard',
   3974         'build/include',
   3975         'build/include_order',
   3976         'build/include_what_you_use',
   3977         'build/namespaces',
   3978         'build/printf_format',
   3979         'build/storage_class',
   3980         'build/using_std',
   3981         'legal/copyright',
   3982         'readability/braces',
   3983         'readability/casting',
   3984         'readability/check',
   3985         'readability/comparison_to_boolean',
   3986         'readability/constructors',
   3987         'readability/control_flow',
   3988         'readability/enum_casing',
   3989         'readability/fn_size',
   3990         'readability/function',
   3991         'readability/multiline_comment',
   3992         'readability/multiline_string',
   3993         'readability/parameter_name',
   3994         'readability/naming',
   3995         'readability/naming/underscores',
   3996         'readability/null',
   3997         'readability/pass_ptr',
   3998         'readability/streams',
   3999         'readability/todo',
   4000         'readability/utf8',
   4001         'readability/webkit_export',
   4002         'runtime/arrays',
   4003         'runtime/bitfields',
   4004         'runtime/casting',
   4005         'runtime/ctype_function',
   4006         'runtime/explicit',
   4007         'runtime/init',
   4008         'runtime/int',
   4009         'runtime/invalid_increment',
   4010         'runtime/leaky_pattern',
   4011         'runtime/max_min_macros',
   4012         'runtime/memset',
   4013         'runtime/printf',
   4014         'runtime/printf_format',
   4015         'runtime/references',
   4016         'runtime/rtti',
   4017         'runtime/sizeof',
   4018         'runtime/string',
   4019         'runtime/threadsafe_fn',
   4020         'runtime/unsigned',
   4021         'runtime/virtual',
   4022         'whitespace/blank_line',
   4023         'whitespace/braces',
   4024         'whitespace/comma',
   4025         'whitespace/comments',
   4026         'whitespace/declaration',
   4027         'whitespace/end_of_line',
   4028         'whitespace/ending_newline',
   4029         'whitespace/indent',
   4030         'whitespace/line_length',
   4031         'whitespace/newline',
   4032         'whitespace/operators',
   4033         'whitespace/parens',
   4034         'whitespace/semicolon',
   4035         'whitespace/tab',
   4036         'whitespace/todo',
   4037         ])
   4038 
   4039     fs = None
   4040 
   4041     def __init__(self, file_path, file_extension, handle_style_error,
   4042                  min_confidence, fs=None):
   4043         """Create a CppChecker instance.
   4044 
   4045         Args:
   4046           file_extension: A string that is the file extension, without
   4047                           the leading dot.
   4048 
   4049         """
   4050         self.file_extension = file_extension
   4051         self.file_path = file_path
   4052         self.handle_style_error = handle_style_error
   4053         self.min_confidence = min_confidence
   4054         CppChecker.fs = fs or FileSystem()
   4055 
   4056     # Useful for unit testing.
   4057     def __eq__(self, other):
   4058         """Return whether this CppChecker instance is equal to another."""
   4059         if self.file_extension != other.file_extension:
   4060             return False
   4061         if self.file_path != other.file_path:
   4062             return False
   4063         if self.handle_style_error != other.handle_style_error:
   4064             return False
   4065         if self.min_confidence != other.min_confidence:
   4066             return False
   4067 
   4068         return True
   4069 
   4070     # Useful for unit testing.
   4071     def __ne__(self, other):
   4072         # Python does not automatically deduce __ne__() from __eq__().
   4073         return not self.__eq__(other)
   4074 
   4075     def check(self, lines):
   4076         _process_lines(self.file_path, self.file_extension, lines,
   4077                        self.handle_style_error, self.min_confidence)
   4078 
   4079 
   4080 # FIXME: Remove this function (requires refactoring unit tests).
   4081 def process_file_data(filename, file_extension, lines, error, min_confidence, fs=None):
   4082     checker = CppChecker(filename, file_extension, error, min_confidence, fs)
   4083     checker.check(lines)
   4084