Home | History | Annotate | Download | only in checkers
      1 #!/usr/bin/python
      2 # -*- coding: utf-8 -*-
      3 #
      4 # Copyright (C) 2009, 2010 Google Inc. All rights reserved.
      5 # Copyright (C) 2009 Torch Mobile Inc.
      6 # Copyright (C) 2009 Apple Inc. All rights reserved.
      7 # Copyright (C) 2010 Chris Jerdonek (cjerdonek (at] webkit.org)
      8 #
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions are
     11 # met:
     12 #
     13 #    * Redistributions of source code must retain the above copyright
     14 # notice, this list of conditions and the following disclaimer.
     15 #    * Redistributions in binary form must reproduce the above
     16 # copyright notice, this list of conditions and the following disclaimer
     17 # in the documentation and/or other materials provided with the
     18 # distribution.
     19 #    * Neither the name of Google Inc. nor the names of its
     20 # contributors may be used to endorse or promote products derived from
     21 # this software without specific prior written permission.
     22 #
     23 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     24 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     25 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     26 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     27 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     28 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     29 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     30 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     31 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     32 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     33 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     34 
     35 # This is the modified version of Google's cpplint. The original code is
     36 # http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
     37 
     38 """Support for check-webkit-style."""
     39 
     40 import codecs
     41 import math  # for log
     42 import os
     43 import os.path
     44 import re
     45 import sre_compile
     46 import string
     47 import sys
     48 import unicodedata
     49 
     50 from webkitpy.common.memoized import memoized
     51 
     52 # The key to use to provide a class to fake loading a header file.
     53 INCLUDE_IO_INJECTION_KEY = 'include_header_io'
     54 
     55 # Headers that we consider STL headers.
     56 _STL_HEADERS = frozenset([
     57     'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
     58     'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
     59     'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
     60     'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
     61     'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
     62     'utility', 'vector', 'vector.h',
     63     ])
     64 
     65 
     66 # Non-STL C++ system headers.
     67 _CPP_HEADERS = frozenset([
     68     'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
     69     'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
     70     'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
     71     'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
     72     'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
     73     'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
     74     'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
     75     'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
     76     'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
     77     'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
     78     'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
     79     'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
     80     'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
     81     ])
     82 
     83 
     84 # Assertion macros.  These are defined in base/logging.h and
     85 # testing/base/gunit.h.  Note that the _M versions need to come first
     86 # for substring matching to work.
     87 _CHECK_MACROS = [
     88     'DCHECK', 'CHECK',
     89     'EXPECT_TRUE_M', 'EXPECT_TRUE',
     90     'ASSERT_TRUE_M', 'ASSERT_TRUE',
     91     'EXPECT_FALSE_M', 'EXPECT_FALSE',
     92     'ASSERT_FALSE_M', 'ASSERT_FALSE',
     93     ]
     94 
     95 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
     96 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
     97 
     98 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
     99                         ('>=', 'GE'), ('>', 'GT'),
    100                         ('<=', 'LE'), ('<', 'LT')]:
    101     _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
    102     _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
    103     _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    104     _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    105     _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
    106     _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
    107 
    108 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
    109                             ('>=', 'LT'), ('>', 'LE'),
    110                             ('<=', 'GT'), ('<', 'GE')]:
    111     _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    112     _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    113     _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
    114     _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
    115 
    116 
    117 # These constants define types of headers for use with
    118 # _IncludeState.check_next_include_order().
    119 _CONFIG_HEADER = 0
    120 _PRIMARY_HEADER = 1
    121 _OTHER_HEADER = 2
    122 _MOC_HEADER = 3
    123 
    124 
    125 # A dictionary of items customize behavior for unit test. For example,
    126 # INCLUDE_IO_INJECTION_KEY allows providing a custom io class which allows
    127 # for faking a header file.
    128 _unit_test_config = {}
    129 
    130 
    131 # The regexp compilation caching is inlined in all regexp functions for
    132 # performance reasons; factoring it out into a separate function turns out
    133 # to be noticeably expensive.
    134 _regexp_compile_cache = {}
    135 
    136 
    137 def match(pattern, s):
    138     """Matches the string with the pattern, caching the compiled regexp."""
    139     if not pattern in _regexp_compile_cache:
    140         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    141     return _regexp_compile_cache[pattern].match(s)
    142 
    143 
    144 def search(pattern, s):
    145     """Searches the string for the pattern, caching the compiled regexp."""
    146     if not pattern in _regexp_compile_cache:
    147         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    148     return _regexp_compile_cache[pattern].search(s)
    149 
    150 
    151 def sub(pattern, replacement, s):
    152     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    153     if not pattern in _regexp_compile_cache:
    154         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    155     return _regexp_compile_cache[pattern].sub(replacement, s)
    156 
    157 
    158 def subn(pattern, replacement, s):
    159     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    160     if not pattern in _regexp_compile_cache:
    161         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    162     return _regexp_compile_cache[pattern].subn(replacement, s)
    163 
    164 
    165 def iteratively_replace_matches_with_char(pattern, char_replacement, s):
    166     """Returns the string with replacement done.
    167 
    168     Every character in the match is replaced with char.
    169     Due to the iterative nature, pattern should not match char or
    170     there will be an infinite loop.
    171 
    172     Example:
    173       pattern = r'<[^>]>' # template parameters
    174       char_replacement =  '_'
    175       s =     'A<B<C, D>>'
    176       Returns 'A_________'
    177 
    178     Args:
    179       pattern: The regex to match.
    180       char_replacement: The character to put in place of every
    181                         character of the match.
    182       s: The string on which to do the replacements.
    183 
    184     Returns:
    185       True, if the given line is blank.
    186     """
    187     while True:
    188         matched = search(pattern, s)
    189         if not matched:
    190             return s
    191         start_match_index = matched.start(0)
    192         end_match_index = matched.end(0)
    193         match_length = end_match_index - start_match_index
    194         s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:]
    195 
    196 
    197 def _rfind_in_lines(regex, lines, start_position, not_found_position):
    198     """Does a reverse find starting at start position and going backwards until
    199     a match is found.
    200 
    201     Returns the position where the regex ended.
    202     """
    203     # Put the regex in a group and proceed it with a greedy expression that
    204     # matches anything to ensure that we get the last possible match in a line.
    205     last_in_line_regex = r'.*(' + regex + ')'
    206     current_row = start_position.row
    207 
    208     # Start with the given row and trim off everything past what may be matched.
    209     current_line = lines[start_position.row][:start_position.column]
    210     while True:
    211         found_match = match(last_in_line_regex, current_line)
    212         if found_match:
    213             return Position(current_row, found_match.end(1))
    214 
    215         # A match was not found so continue backward.
    216         current_row -= 1
    217         if current_row < 0:
    218             return not_found_position
    219         current_line = lines[current_row]
    220 
    221 
    222 def _convert_to_lower_with_underscores(text):
    223     """Converts all text strings in camelCase or PascalCase to lowers with underscores."""
    224 
    225     # First add underscores before any capital letter followed by a lower case letter
    226     # as long as it is in a word.
    227     # (This put an underscore before Password but not P and A in WPAPassword).
    228     text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text)
    229 
    230     # Next add underscores before capitals at the end of words if it was
    231     # preceeded by lower case letter or number.
    232     # (This puts an underscore before A in isA but not A in CBA).
    233     text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text)
    234 
    235     # Next add underscores when you have a captial letter which is followed by a capital letter
    236     # but is not proceeded by one. (This puts an underscore before A in 'WordADay').
    237     text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text)
    238 
    239     return text.lower()
    240 
    241 
    242 
    243 def _create_acronym(text):
    244     """Creates an acronym for the given text."""
    245     # Removes all lower case letters except those starting words.
    246     text = sub(r'(?<!\b)[a-z]', '', text)
    247     return text.upper()
    248 
    249 
    250 def up_to_unmatched_closing_paren(s):
    251     """Splits a string into two parts up to first unmatched ')'.
    252 
    253     Args:
    254       s: a string which is a substring of line after '('
    255       (e.g., "a == (b + c))").
    256 
    257     Returns:
    258       A pair of strings (prefix before first unmatched ')',
    259       remainder of s after first unmatched ')'), e.g.,
    260       up_to_unmatched_closing_paren("a == (b + c)) { ")
    261       returns "a == (b + c)", " {".
    262       Returns None, None if there is no unmatched ')'
    263 
    264     """
    265     i = 1
    266     for pos, c in enumerate(s):
    267       if c == '(':
    268         i += 1
    269       elif c == ')':
    270         i -= 1
    271         if i == 0:
    272           return s[:pos], s[pos + 1:]
    273     return None, None
    274 
    275 class _IncludeState(dict):
    276     """Tracks line numbers for includes, and the order in which includes appear.
    277 
    278     As a dict, an _IncludeState object serves as a mapping between include
    279     filename and line number on which that file was included.
    280 
    281     Call check_next_include_order() once for each header in the file, passing
    282     in the type constants defined above. Calls in an illegal order will
    283     raise an _IncludeError with an appropriate error message.
    284 
    285     """
    286     # self._section will move monotonically through this set. If it ever
    287     # needs to move backwards, check_next_include_order will raise an error.
    288     _INITIAL_SECTION = 0
    289     _CONFIG_SECTION = 1
    290     _PRIMARY_SECTION = 2
    291     _OTHER_SECTION = 3
    292 
    293     _TYPE_NAMES = {
    294         _CONFIG_HEADER: 'WebCore config.h',
    295         _PRIMARY_HEADER: 'header this file implements',
    296         _OTHER_HEADER: 'other header',
    297         _MOC_HEADER: 'moc file',
    298         }
    299     _SECTION_NAMES = {
    300         _INITIAL_SECTION: "... nothing.",
    301         _CONFIG_SECTION: "WebCore config.h.",
    302         _PRIMARY_SECTION: 'a header this file implements.',
    303         _OTHER_SECTION: 'other header.',
    304         }
    305 
    306     def __init__(self):
    307         dict.__init__(self)
    308         self._section = self._INITIAL_SECTION
    309         self._visited_primary_section = False
    310         self.header_types = dict();
    311 
    312     def visited_primary_section(self):
    313         return self._visited_primary_section
    314 
    315     def check_next_include_order(self, header_type, file_is_header, primary_header_exists):
    316         """Returns a non-empty error message if the next header is out of order.
    317 
    318         This function also updates the internal state to be ready to check
    319         the next include.
    320 
    321         Args:
    322           header_type: One of the _XXX_HEADER constants defined above.
    323           file_is_header: Whether the file that owns this _IncludeState is itself a header
    324 
    325         Returns:
    326           The empty string if the header is in the right order, or an
    327           error message describing what's wrong.
    328 
    329         """
    330         if header_type == _CONFIG_HEADER and file_is_header:
    331             return 'Header file should not contain WebCore config.h.'
    332         if header_type == _PRIMARY_HEADER and file_is_header:
    333             return 'Header file should not contain itself.'
    334         if header_type == _MOC_HEADER:
    335             return ''
    336 
    337         error_message = ''
    338         if self._section != self._OTHER_SECTION:
    339             before_error_message = ('Found %s before %s' %
    340                                     (self._TYPE_NAMES[header_type],
    341                                      self._SECTION_NAMES[self._section + 1]))
    342         after_error_message = ('Found %s after %s' %
    343                                 (self._TYPE_NAMES[header_type],
    344                                  self._SECTION_NAMES[self._section]))
    345 
    346         if header_type == _CONFIG_HEADER:
    347             if self._section >= self._CONFIG_SECTION:
    348                 error_message = after_error_message
    349             self._section = self._CONFIG_SECTION
    350         elif header_type == _PRIMARY_HEADER:
    351             if self._section >= self._PRIMARY_SECTION:
    352                 error_message = after_error_message
    353             elif self._section < self._CONFIG_SECTION:
    354                 error_message = before_error_message
    355             self._section = self._PRIMARY_SECTION
    356             self._visited_primary_section = True
    357         else:
    358             assert header_type == _OTHER_HEADER
    359             if not file_is_header and self._section < self._PRIMARY_SECTION:
    360                 if primary_header_exists:
    361                     error_message = before_error_message
    362             self._section = self._OTHER_SECTION
    363 
    364         return error_message
    365 
    366 
    367 class Position(object):
    368     """Holds the position of something."""
    369     def __init__(self, row, column):
    370         self.row = row
    371         self.column = column
    372 
    373     def __str__(self):
    374         return '(%s, %s)' % (self.row, self.column)
    375 
    376     def __cmp__(self, other):
    377         return self.row.__cmp__(other.row) or self.column.__cmp__(other.column)
    378 
    379 
    380 class Parameter(object):
    381     """Information about one function parameter."""
    382     def __init__(self, parameter, parameter_name_index, row):
    383         self.type = parameter[:parameter_name_index].strip()
    384         # Remove any initializers from the parameter name (e.g. int i = 5).
    385         self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip()
    386         self.row = row
    387 
    388     @memoized
    389     def lower_with_underscores_name(self):
    390         """Returns the parameter name in the lower with underscores format."""
    391         return _convert_to_lower_with_underscores(self.name)
    392 
    393 
    394 class SingleLineView(object):
    395     """Converts multiple lines into a single line (with line breaks replaced by a
    396        space) to allow for easier searching."""
    397     def __init__(self, lines, start_position, end_position):
    398         """Create a SingleLineView instance.
    399 
    400         Args:
    401           lines: a list of multiple lines to combine into a single line.
    402           start_position: offset within lines of where to start the single line.
    403           end_position: just after where to end (like a slice operation).
    404         """
    405         # Get the rows of interest.
    406         trimmed_lines = lines[start_position.row:end_position.row + 1]
    407 
    408         # Remove the columns on the last line that aren't included.
    409         trimmed_lines[-1] = trimmed_lines[-1][:end_position.column]
    410 
    411         # Remove the columns on the first line that aren't included.
    412         trimmed_lines[0] = trimmed_lines[0][start_position.column:]
    413 
    414         # Create a single line with all of the parameters.
    415         self.single_line = ' '.join(trimmed_lines)
    416 
    417         # Keep the row lengths, so we can calculate the original row number
    418         # given a column in the single line (adding 1 due to the space added
    419         # during the join).
    420         self._row_lengths = [len(line) + 1 for line in trimmed_lines]
    421         self._starting_row = start_position.row
    422 
    423     def convert_column_to_row(self, single_line_column_number):
    424         """Convert the column number from the single line into the original
    425         line number.
    426 
    427         Special cases:
    428         * Columns in the added spaces are considered part of the previous line.
    429         * Columns beyond the end of the line are consider part the last line
    430         in the view."""
    431         total_columns = 0
    432         row_offset = 0
    433         while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]:
    434             total_columns += self._row_lengths[row_offset]
    435             row_offset += 1
    436         return self._starting_row + row_offset
    437 
    438 
    439 def create_skeleton_parameters(all_parameters):
    440     """Converts a parameter list to a skeleton version.
    441 
    442     The skeleton only has one word for the parameter name, one word for the type,
    443     and commas after each parameter and only there. Everything in the skeleton
    444     remains in the same columns as the original."""
    445     all_simplifications = (
    446         # Remove template parameters, function declaration parameters, etc.
    447         r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})',
    448         # Remove all initializers.
    449         r'=[^,]*',
    450         # Remove :: and everything before it.
    451         r'[^,]*::',
    452         # Remove modifiers like &, *.
    453         r'[&*]',
    454         # Remove const modifiers.
    455         r'\bconst\s+(?=[A-Za-z])',
    456         # Remove numerical modifiers like long.
    457         r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)')
    458 
    459     skeleton_parameters = all_parameters
    460     for simplification in all_simplifications:
    461         skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters)
    462     # If there are any parameters, then add a , after the last one to
    463     # make a regular pattern of a , following every parameter.
    464     if skeleton_parameters.strip():
    465         skeleton_parameters += ','
    466     return skeleton_parameters
    467 
    468 
    469 def find_parameter_name_index(skeleton_parameter):
    470     """Determines where the parametere name starts given the skeleton parameter."""
    471     # The first space from the right in the simplified parameter is where the parameter
    472     # name starts unless the first space is before any content in the simplified parameter.
    473     before_name_index = skeleton_parameter.rstrip().rfind(' ')
    474     if before_name_index != -1 and skeleton_parameter[:before_name_index].strip():
    475         return before_name_index + 1
    476     return len(skeleton_parameter)
    477 
    478 
    479 def parameter_list(elided_lines, start_position, end_position):
    480     """Generator for a function's parameters."""
    481     # Create new positions that omit the outer parenthesis of the parameters.
    482     start_position = Position(row=start_position.row, column=start_position.column + 1)
    483     end_position = Position(row=end_position.row, column=end_position.column - 1)
    484     single_line_view = SingleLineView(elided_lines, start_position, end_position)
    485     skeleton_parameters = create_skeleton_parameters(single_line_view.single_line)
    486     end_index = -1
    487 
    488     while True:
    489         # Find the end of the next parameter.
    490         start_index = end_index + 1
    491         end_index = skeleton_parameters.find(',', start_index)
    492 
    493         # No comma means that all parameters have been parsed.
    494         if end_index == -1:
    495             return
    496         row = single_line_view.convert_column_to_row(end_index)
    497 
    498         # Parse the parameter into a type and parameter name.
    499         skeleton_parameter = skeleton_parameters[start_index:end_index]
    500         name_offset = find_parameter_name_index(skeleton_parameter)
    501         parameter = single_line_view.single_line[start_index:end_index]
    502         yield Parameter(parameter, name_offset, row)
    503 
    504 
    505 class _FunctionState(object):
    506     """Tracks current function name and the number of lines in its body.
    507 
    508     Attributes:
    509       min_confidence: The minimum confidence level to use while checking style.
    510 
    511     """
    512 
    513     _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    514     _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
    515 
    516     def __init__(self, min_confidence):
    517         self.min_confidence = min_confidence
    518         self.current_function = ''
    519         self.in_a_function = False
    520         self.lines_in_function = 0
    521         # Make sure these will not be mistaken for real positions (even when a
    522         # small amount is added to them).
    523         self.body_start_position = Position(-1000, 0)
    524         self.end_position = Position(-1000, 0)
    525 
    526     def begin(self, function_name, function_name_start_position, body_start_position, end_position,
    527               parameter_start_position, parameter_end_position, clean_lines):
    528         """Start analyzing function body.
    529 
    530         Args:
    531             function_name: The name of the function being tracked.
    532             function_name_start_position: Position in elided where the function name starts.
    533             body_start_position: Position in elided of the { or the ; for a prototype.
    534             end_position: Position in elided just after the final } (or ; is.
    535             parameter_start_position: Position in elided of the '(' for the parameters.
    536             parameter_end_position: Position in elided just after the ')' for the parameters.
    537             clean_lines: A CleansedLines instance containing the file.
    538         """
    539         self.in_a_function = True
    540         self.lines_in_function = -1  # Don't count the open brace line.
    541         self.current_function = function_name
    542         self.function_name_start_position = function_name_start_position
    543         self.body_start_position = body_start_position
    544         self.end_position = end_position
    545         self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';'
    546         self.parameter_start_position = parameter_start_position
    547         self.parameter_end_position = parameter_end_position
    548         self.is_pure = False
    549         if self.is_declaration:
    550             characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line
    551             self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters))
    552         self._clean_lines = clean_lines
    553         self._parameter_list = None
    554 
    555     def modifiers_and_return_type(self):
    556         """Returns the modifiers and the return type."""
    557         # Go backwards from where the function name is until we encounter one of several things:
    558         #   ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0)
    559         elided = self._clean_lines.elided
    560         start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)',
    561                                           elided, self.parameter_start_position, Position(0, 0))
    562         return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip()
    563 
    564     def parameter_list(self):
    565         if not self._parameter_list:
    566             # Store the final result as a tuple since that is immutable.
    567             self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position))
    568 
    569         return self._parameter_list
    570 
    571     def count(self, line_number):
    572         """Count line in current function body."""
    573         if self.in_a_function and line_number >= self.body_start_position.row:
    574             self.lines_in_function += 1
    575 
    576     def check(self, error, line_number):
    577         """Report if too many lines in function body.
    578 
    579         Args:
    580           error: The function to call with any errors found.
    581           line_number: The number of the line to check.
    582         """
    583         if match(r'T(EST|est)', self.current_function):
    584             base_trigger = self._TEST_TRIGGER
    585         else:
    586             base_trigger = self._NORMAL_TRIGGER
    587         trigger = base_trigger * 2 ** self.min_confidence
    588 
    589         if self.lines_in_function > trigger:
    590             error_level = int(math.log(self.lines_in_function / base_trigger, 2))
    591             # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
    592             if error_level > 5:
    593                 error_level = 5
    594             error(line_number, 'readability/fn_size', error_level,
    595                   'Small and focused functions are preferred:'
    596                   ' %s has %d non-comment lines'
    597                   ' (error triggered by exceeding %d lines).'  % (
    598                       self.current_function, self.lines_in_function, trigger))
    599 
    600     def end(self):
    601         """Stop analyzing function body."""
    602         self.in_a_function = False
    603 
    604 
    605 class _IncludeError(Exception):
    606     """Indicates a problem with the include order in a file."""
    607     pass
    608 
    609 
    610 class FileInfo:
    611     """Provides utility functions for filenames.
    612 
    613     FileInfo provides easy access to the components of a file's path
    614     relative to the project root.
    615     """
    616 
    617     def __init__(self, filename):
    618         self._filename = filename
    619 
    620     def full_name(self):
    621         """Make Windows paths like Unix."""
    622         return os.path.abspath(self._filename).replace('\\', '/')
    623 
    624     def repository_name(self):
    625         """Full name after removing the local path to the repository.
    626 
    627         If we have a real absolute path name here we can try to do something smart:
    628         detecting the root of the checkout and truncating /path/to/checkout from
    629         the name so that we get header guards that don't include things like
    630         "C:\Documents and Settings\..." or "/home/username/..." in them and thus
    631         people on different computers who have checked the source out to different
    632         locations won't see bogus errors.
    633         """
    634         fullname = self.full_name()
    635 
    636         if os.path.exists(fullname):
    637             project_dir = os.path.dirname(fullname)
    638 
    639             if os.path.exists(os.path.join(project_dir, ".svn")):
    640                 # If there's a .svn file in the current directory, we
    641                 # recursively look up the directory tree for the top
    642                 # of the SVN checkout
    643                 root_dir = project_dir
    644                 one_up_dir = os.path.dirname(root_dir)
    645                 while os.path.exists(os.path.join(one_up_dir, ".svn")):
    646                     root_dir = os.path.dirname(root_dir)
    647                     one_up_dir = os.path.dirname(one_up_dir)
    648 
    649                 prefix = os.path.commonprefix([root_dir, project_dir])
    650                 return fullname[len(prefix) + 1:]
    651 
    652             # Not SVN? Try to find a git top level directory by
    653             # searching up from the current path.
    654             root_dir = os.path.dirname(fullname)
    655             while (root_dir != os.path.dirname(root_dir)
    656                    and not os.path.exists(os.path.join(root_dir, ".git"))):
    657                 root_dir = os.path.dirname(root_dir)
    658                 if os.path.exists(os.path.join(root_dir, ".git")):
    659                     prefix = os.path.commonprefix([root_dir, project_dir])
    660                     return fullname[len(prefix) + 1:]
    661 
    662         # Don't know what to do; header guard warnings may be wrong...
    663         return fullname
    664 
    665     def split(self):
    666         """Splits the file into the directory, basename, and extension.
    667 
    668         For 'chrome/browser/browser.cpp', Split() would
    669         return ('chrome/browser', 'browser', '.cpp')
    670 
    671         Returns:
    672           A tuple of (directory, basename, extension).
    673         """
    674 
    675         googlename = self.repository_name()
    676         project, rest = os.path.split(googlename)
    677         return (project,) + os.path.splitext(rest)
    678 
    679     def base_name(self):
    680         """File base name - text after the final slash, before the final period."""
    681         return self.split()[1]
    682 
    683     def extension(self):
    684         """File extension - text following the final period."""
    685         return self.split()[2]
    686 
    687     def no_extension(self):
    688         """File has no source file extension."""
    689         return '/'.join(self.split()[0:2])
    690 
    691     def is_source(self):
    692         """File has a source file extension."""
    693         return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
    694 
    695 
    696 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
    697 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
    698     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
    699 # Matches strings.  Escape codes should already be removed by ESCAPES.
    700 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
    701 # Matches characters.  Escape codes should already be removed by ESCAPES.
    702 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
    703 # Matches multi-line C++ comments.
    704 # This RE is a little bit more complicated than one might expect, because we
    705 # have to take care of space removals tools so we can handle comments inside
    706 # statements better.
    707 # The current rule is: We only clear spaces from both sides when we're at the
    708 # end of the line. Otherwise, we try to remove spaces from the right side,
    709 # if this doesn't work we try on left side but only if there's a non-character
    710 # on the right.
    711 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
    712     r"""(\s*/\*.*\*/\s*$|
    713             /\*.*\*/\s+|
    714          \s+/\*.*\*/(?=\W)|
    715             /\*.*\*/)""", re.VERBOSE)
    716 
    717 
    718 def is_cpp_string(line):
    719     """Does line terminate so, that the next symbol is in string constant.
    720 
    721     This function does not consider single-line nor multi-line comments.
    722 
    723     Args:
    724       line: is a partial line of code starting from the 0..n.
    725 
    726     Returns:
    727       True, if next character appended to 'line' is inside a
    728       string constant.
    729     """
    730 
    731     line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
    732     return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
    733 
    734 
    735 def find_next_multi_line_comment_start(lines, line_index):
    736     """Find the beginning marker for a multiline comment."""
    737     while line_index < len(lines):
    738         if lines[line_index].strip().startswith('/*'):
    739             # Only return this marker if the comment goes beyond this line
    740             if lines[line_index].strip().find('*/', 2) < 0:
    741                 return line_index
    742         line_index += 1
    743     return len(lines)
    744 
    745 
    746 def find_next_multi_line_comment_end(lines, line_index):
    747     """We are inside a comment, find the end marker."""
    748     while line_index < len(lines):
    749         if lines[line_index].strip().endswith('*/'):
    750             return line_index
    751         line_index += 1
    752     return len(lines)
    753 
    754 
    755 def remove_multi_line_comments_from_range(lines, begin, end):
    756     """Clears a range of lines for multi-line comments."""
    757     # Having // dummy comments makes the lines non-empty, so we will not get
    758     # unnecessary blank line warnings later in the code.
    759     for i in range(begin, end):
    760         lines[i] = '// dummy'
    761 
    762 
    763 def remove_multi_line_comments(lines, error):
    764     """Removes multiline (c-style) comments from lines."""
    765     line_index = 0
    766     while line_index < len(lines):
    767         line_index_begin = find_next_multi_line_comment_start(lines, line_index)
    768         if line_index_begin >= len(lines):
    769             return
    770         line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
    771         if line_index_end >= len(lines):
    772             error(line_index_begin + 1, 'readability/multiline_comment', 5,
    773                   'Could not find end of multi-line comment')
    774             return
    775         remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
    776         line_index = line_index_end + 1
    777 
    778 
    779 def cleanse_comments(line):
    780     """Removes //-comments and single-line C-style /* */ comments.
    781 
    782     Args:
    783       line: A line of C++ source.
    784 
    785     Returns:
    786       The line with single-line comments removed.
    787     """
    788     comment_position = line.find('//')
    789     if comment_position != -1 and not is_cpp_string(line[:comment_position]):
    790         line = line[:comment_position]
    791     # get rid of /* ... */
    792     return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
    793 
    794 
    795 class CleansedLines(object):
    796     """Holds 3 copies of all lines with different preprocessing applied to them.
    797 
    798     1) elided member contains lines without strings and comments,
    799     2) lines member contains lines without comments, and
    800     3) raw member contains all the lines without processing.
    801     All these three members are of <type 'list'>, and of the same length.
    802     """
    803 
    804     def __init__(self, lines):
    805         self.elided = []
    806         self.lines = []
    807         self.raw_lines = lines
    808         self._num_lines = len(lines)
    809         for line_number in range(len(lines)):
    810             self.lines.append(cleanse_comments(lines[line_number]))
    811             elided = self.collapse_strings(lines[line_number])
    812             self.elided.append(cleanse_comments(elided))
    813 
    814     def num_lines(self):
    815         """Returns the number of lines represented."""
    816         return self._num_lines
    817 
    818     @staticmethod
    819     def collapse_strings(elided):
    820         """Collapses strings and chars on a line to simple "" or '' blocks.
    821 
    822         We nix strings first so we're not fooled by text like '"http://"'
    823 
    824         Args:
    825           elided: The line being processed.
    826 
    827         Returns:
    828           The line with collapsed strings.
    829         """
    830         if not _RE_PATTERN_INCLUDE.match(elided):
    831             # Remove escaped characters first to make quote/single quote collapsing
    832             # basic.  Things that look like escaped characters shouldn't occur
    833             # outside of strings and chars.
    834             elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
    835             elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
    836             elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
    837         return elided
    838 
    839 
    840 def close_expression(elided, position):
    841     """If input points to ( or { or [, finds the position that closes it.
    842 
    843     If elided[position.row][position.column] points to a '(' or '{' or '[',
    844     finds the line_number/pos that correspond to the closing of the expression.
    845 
    846      Args:
    847        elided: A CleansedLines.elided instance containing the file.
    848        position: The position of the opening item.
    849 
    850      Returns:
    851       The Position *past* the closing brace, or Position(len(elided), -1)
    852       if we never find a close. Note we ignore strings and comments when matching.
    853     """
    854     line = elided[position.row]
    855     start_character = line[position.column]
    856     if start_character == '(':
    857         enclosing_character_regex = r'[\(\)]'
    858     elif start_character == '[':
    859         enclosing_character_regex = r'[\[\]]'
    860     elif start_character == '{':
    861         enclosing_character_regex = r'[\{\}]'
    862     else:
    863         return Position(len(elided), -1)
    864 
    865     current_column = position.column + 1
    866     line_number = position.row
    867     net_open = 1
    868     for line in elided[position.row:]:
    869         line = line[current_column:]
    870 
    871         # Search the current line for opening and closing characters.
    872         while True:
    873             next_enclosing_character = search(enclosing_character_regex, line)
    874             # No more on this line.
    875             if not next_enclosing_character:
    876                 break
    877             current_column += next_enclosing_character.end(0)
    878             line = line[next_enclosing_character.end(0):]
    879             if next_enclosing_character.group(0) == start_character:
    880                 net_open += 1
    881             else:
    882                 net_open -= 1
    883                 if not net_open:
    884                     return Position(line_number, current_column)
    885 
    886         # Proceed to the next line.
    887         line_number += 1
    888         current_column = 0
    889 
    890     # The given item was not closed.
    891     return Position(len(elided), -1)
    892 
    893 def check_for_copyright(lines, error):
    894     """Logs an error if no Copyright message appears at the top of the file."""
    895 
    896     # We'll say it should occur by line 10. Don't forget there's a
    897     # dummy line at the front.
    898     for line in xrange(1, min(len(lines), 11)):
    899         if re.search(r'Copyright', lines[line], re.I):
    900             break
    901     else:                       # means no copyright line was found
    902         error(0, 'legal/copyright', 5,
    903               'No copyright message found.  '
    904               'You should have a line: "Copyright [year] <Copyright Owner>"')
    905 
    906 
    907 def get_header_guard_cpp_variable(filename):
    908     """Returns the CPP variable that should be used as a header guard.
    909 
    910     Args:
    911       filename: The name of a C++ header file.
    912 
    913     Returns:
    914       The CPP variable that should be used as a header guard in the
    915       named file.
    916 
    917     """
    918 
    919     # Restores original filename in case that style checker is invoked from Emacs's
    920     # flymake.
    921     filename = re.sub(r'_flymake\.h$', '.h', filename)
    922 
    923     standard_name = sub(r'[-.\s]', '_', os.path.basename(filename))
    924 
    925     # Files under WTF typically have header guards that start with WTF_.
    926     if filename.find('/wtf/'):
    927         special_name = "WTF_" + standard_name
    928     else:
    929         special_name = standard_name
    930     return (special_name, standard_name)
    931 
    932 
    933 def check_for_header_guard(filename, lines, error):
    934     """Checks that the file contains a header guard.
    935 
    936     Logs an error if no #ifndef header guard is present.  For other
    937     headers, checks that the full pathname is used.
    938 
    939     Args:
    940       filename: The name of the C++ header file.
    941       lines: An array of strings, each representing a line of the file.
    942       error: The function to call with any errors found.
    943     """
    944 
    945     cppvar = get_header_guard_cpp_variable(filename)
    946 
    947     ifndef = None
    948     ifndef_line_number = 0
    949     define = None
    950     for line_number, line in enumerate(lines):
    951         line_split = line.split()
    952         if len(line_split) >= 2:
    953             # find the first occurrence of #ifndef and #define, save arg
    954             if not ifndef and line_split[0] == '#ifndef':
    955                 # set ifndef to the header guard presented on the #ifndef line.
    956                 ifndef = line_split[1]
    957                 ifndef_line_number = line_number
    958             if not define and line_split[0] == '#define':
    959                 define = line_split[1]
    960             if define and ifndef:
    961                 break
    962 
    963     if not ifndef or not define or ifndef != define:
    964         error(0, 'build/header_guard', 5,
    965               'No #ifndef header guard found, suggested CPP variable is: %s' %
    966               cppvar[0])
    967         return
    968 
    969     # The guard should be File_h.
    970     if ifndef not in cppvar:
    971         error(ifndef_line_number, 'build/header_guard', 5,
    972               '#ifndef header guard has wrong style, please use: %s' % cppvar[0])
    973 
    974 
    975 def check_for_unicode_replacement_characters(lines, error):
    976     """Logs an error for each line containing Unicode replacement characters.
    977 
    978     These indicate that either the file contained invalid UTF-8 (likely)
    979     or Unicode replacement characters (which it shouldn't).  Note that
    980     it's possible for this to throw off line numbering if the invalid
    981     UTF-8 occurred adjacent to a newline.
    982 
    983     Args:
    984       lines: An array of strings, each representing a line of the file.
    985       error: The function to call with any errors found.
    986     """
    987     for line_number, line in enumerate(lines):
    988         if u'\ufffd' in line:
    989             error(line_number, 'readability/utf8', 5,
    990                   'Line contains invalid UTF-8 (or Unicode replacement character).')
    991 
    992 
    993 def check_for_new_line_at_eof(lines, error):
    994     """Logs an error if there is no newline char at the end of the file.
    995 
    996     Args:
    997       lines: An array of strings, each representing a line of the file.
    998       error: The function to call with any errors found.
    999     """
   1000 
   1001     # The array lines() was created by adding two newlines to the
   1002     # original file (go figure), then splitting on \n.
   1003     # To verify that the file ends in \n, we just have to make sure the
   1004     # last-but-two element of lines() exists and is empty.
   1005     if len(lines) < 3 or lines[-2]:
   1006         error(len(lines) - 2, 'whitespace/ending_newline', 5,
   1007               'Could not find a newline character at the end of the file.')
   1008 
   1009 
   1010 def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
   1011     """Logs an error if we see /* ... */ or "..." that extend past one line.
   1012 
   1013     /* ... */ comments are legit inside macros, for one line.
   1014     Otherwise, we prefer // comments, so it's ok to warn about the
   1015     other.  Likewise, it's ok for strings to extend across multiple
   1016     lines, as long as a line continuation character (backslash)
   1017     terminates each line. Although not currently prohibited by the C++
   1018     style guide, it's ugly and unnecessary. We don't do well with either
   1019     in this lint program, so we warn about both.
   1020 
   1021     Args:
   1022       clean_lines: A CleansedLines instance containing the file.
   1023       line_number: The number of the line to check.
   1024       error: The function to call with any errors found.
   1025     """
   1026     line = clean_lines.elided[line_number]
   1027 
   1028     # Remove all \\ (escaped backslashes) from the line. They are OK, and the
   1029     # second (escaped) slash may trigger later \" detection erroneously.
   1030     line = line.replace('\\\\', '')
   1031 
   1032     if line.count('/*') > line.count('*/'):
   1033         error(line_number, 'readability/multiline_comment', 5,
   1034               'Complex multi-line /*...*/-style comment found. '
   1035               'Lint may give bogus warnings.  '
   1036               'Consider replacing these with //-style comments, '
   1037               'with #if 0...#endif, '
   1038               'or with more clearly structured multi-line comments.')
   1039 
   1040     if (line.count('"') - line.count('\\"')) % 2:
   1041         error(line_number, 'readability/multiline_string', 5,
   1042               'Multi-line string ("...") found.  This lint script doesn\'t '
   1043               'do well with such strings, and may give bogus warnings.  They\'re '
   1044               'ugly and unnecessary, and you should use concatenation instead".')
   1045 
   1046 
   1047 _THREADING_LIST = (
   1048     ('asctime(', 'asctime_r('),
   1049     ('ctime(', 'ctime_r('),
   1050     ('getgrgid(', 'getgrgid_r('),
   1051     ('getgrnam(', 'getgrnam_r('),
   1052     ('getlogin(', 'getlogin_r('),
   1053     ('getpwnam(', 'getpwnam_r('),
   1054     ('getpwuid(', 'getpwuid_r('),
   1055     ('gmtime(', 'gmtime_r('),
   1056     ('localtime(', 'localtime_r('),
   1057     ('rand(', 'rand_r('),
   1058     ('readdir(', 'readdir_r('),
   1059     ('strtok(', 'strtok_r('),
   1060     ('ttyname(', 'ttyname_r('),
   1061     )
   1062 
   1063 
   1064 def check_posix_threading(clean_lines, line_number, error):
   1065     """Checks for calls to thread-unsafe functions.
   1066 
   1067     Much code has been originally written without consideration of
   1068     multi-threading. Also, engineers are relying on their old experience;
   1069     they have learned posix before threading extensions were added. These
   1070     tests guide the engineers to use thread-safe functions (when using
   1071     posix directly).
   1072 
   1073     Args:
   1074       clean_lines: A CleansedLines instance containing the file.
   1075       line_number: The number of the line to check.
   1076       error: The function to call with any errors found.
   1077     """
   1078     line = clean_lines.elided[line_number]
   1079     for single_thread_function, multithread_safe_function in _THREADING_LIST:
   1080         index = line.find(single_thread_function)
   1081         # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
   1082         if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
   1083                                           and line[index - 1] not in ('_', '.', '>'))):
   1084             error(line_number, 'runtime/threadsafe_fn', 2,
   1085                   'Consider using ' + multithread_safe_function +
   1086                   '...) instead of ' + single_thread_function +
   1087                   '...) for improved thread safety.')
   1088 
   1089 
   1090 # Matches invalid increment: *count++, which moves pointer instead of
   1091 # incrementing a value.
   1092 _RE_PATTERN_INVALID_INCREMENT = re.compile(
   1093     r'^\s*\*\w+(\+\+|--);')
   1094 
   1095 
   1096 def check_invalid_increment(clean_lines, line_number, error):
   1097     """Checks for invalid increment *count++.
   1098 
   1099     For example following function:
   1100     void increment_counter(int* count) {
   1101         *count++;
   1102     }
   1103     is invalid, because it effectively does count++, moving pointer, and should
   1104     be replaced with ++*count, (*count)++ or *count += 1.
   1105 
   1106     Args:
   1107       clean_lines: A CleansedLines instance containing the file.
   1108       line_number: The number of the line to check.
   1109       error: The function to call with any errors found.
   1110     """
   1111     line = clean_lines.elided[line_number]
   1112     if _RE_PATTERN_INVALID_INCREMENT.match(line):
   1113         error(line_number, 'runtime/invalid_increment', 5,
   1114               'Changing pointer instead of value (or unused value of operator*).')
   1115 
   1116 
   1117 class _ClassInfo(object):
   1118     """Stores information about a class."""
   1119 
   1120     def __init__(self, name, line_number):
   1121         self.name = name
   1122         self.line_number = line_number
   1123         self.seen_open_brace = False
   1124         self.is_derived = False
   1125         self.virtual_method_line_number = None
   1126         self.has_virtual_destructor = False
   1127         self.brace_depth = 0
   1128 
   1129 
   1130 class _ClassState(object):
   1131     """Holds the current state of the parse relating to class declarations.
   1132 
   1133     It maintains a stack of _ClassInfos representing the parser's guess
   1134     as to the current nesting of class declarations. The innermost class
   1135     is at the top (back) of the stack. Typically, the stack will either
   1136     be empty or have exactly one entry.
   1137     """
   1138 
   1139     def __init__(self):
   1140         self.classinfo_stack = []
   1141 
   1142     def check_finished(self, error):
   1143         """Checks that all classes have been completely parsed.
   1144 
   1145         Call this when all lines in a file have been processed.
   1146         Args:
   1147           error: The function to call with any errors found.
   1148         """
   1149         if self.classinfo_stack:
   1150             # Note: This test can result in false positives if #ifdef constructs
   1151             # get in the way of brace matching. See the testBuildClass test in
   1152             # cpp_style_unittest.py for an example of this.
   1153             error(self.classinfo_stack[0].line_number, 'build/class', 5,
   1154                   'Failed to find complete declaration of class %s' %
   1155                   self.classinfo_stack[0].name)
   1156 
   1157 
   1158 class _FileState(object):
   1159     def __init__(self, clean_lines, file_extension):
   1160         self._did_inside_namespace_indent_warning = False
   1161         self._clean_lines = clean_lines
   1162         if file_extension in ['m', 'mm']:
   1163             self._is_objective_c = True
   1164         elif file_extension == 'h':
   1165             # In the case of header files, it is unknown if the file
   1166             # is objective c or not, so set this value to None and then
   1167             # if it is requested, use heuristics to guess the value.
   1168             self._is_objective_c = None
   1169         else:
   1170             self._is_objective_c = False
   1171         self._is_c = file_extension == 'c'
   1172 
   1173     def set_did_inside_namespace_indent_warning(self):
   1174         self._did_inside_namespace_indent_warning = True
   1175 
   1176     def did_inside_namespace_indent_warning(self):
   1177         return self._did_inside_namespace_indent_warning
   1178 
   1179     def is_objective_c(self):
   1180         if self._is_objective_c is None:
   1181             for line in self._clean_lines.elided:
   1182                 # Starting with @ or #import seem like the best indications
   1183                 # that we have an Objective C file.
   1184                 if line.startswith("@") or line.startswith("#import"):
   1185                     self._is_objective_c = True
   1186                     break
   1187             else:
   1188                 self._is_objective_c = False
   1189         return self._is_objective_c
   1190 
   1191     def is_c_or_objective_c(self):
   1192         """Return whether the file extension corresponds to C or Objective-C."""
   1193         return self._is_c or self.is_objective_c()
   1194 
   1195 
   1196 def check_for_non_standard_constructs(clean_lines, line_number,
   1197                                       class_state, error):
   1198     """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
   1199 
   1200     Complain about several constructs which gcc-2 accepts, but which are
   1201     not standard C++.  Warning about these in lint is one way to ease the
   1202     transition to new compilers.
   1203     - put storage class first (e.g. "static const" instead of "const static").
   1204     - "%lld" instead of %qd" in printf-type functions.
   1205     - "%1$d" is non-standard in printf-type functions.
   1206     - "\%" is an undefined character escape sequence.
   1207     - text after #endif is not allowed.
   1208     - invalid inner-style forward declaration.
   1209     - >? and <? operators, and their >?= and <?= cousins.
   1210     - classes with virtual methods need virtual destructors (compiler warning
   1211         available, but not turned on yet.)
   1212 
   1213     Additionally, check for constructor/destructor style violations as it
   1214     is very convenient to do so while checking for gcc-2 compliance.
   1215 
   1216     Args:
   1217       clean_lines: A CleansedLines instance containing the file.
   1218       line_number: The number of the line to check.
   1219       class_state: A _ClassState instance which maintains information about
   1220                    the current stack of nested class declarations being parsed.
   1221       error: A callable to which errors are reported, which takes parameters:
   1222              line number, error level, and message
   1223     """
   1224 
   1225     # Remove comments from the line, but leave in strings for now.
   1226     line = clean_lines.lines[line_number]
   1227 
   1228     if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
   1229         error(line_number, 'runtime/printf_format', 3,
   1230               '%q in format strings is deprecated.  Use %ll instead.')
   1231 
   1232     if search(r'printf\s*\(.*".*%\d+\$', line):
   1233         error(line_number, 'runtime/printf_format', 2,
   1234               '%N$ formats are unconventional.  Try rewriting to avoid them.')
   1235 
   1236     # Remove escaped backslashes before looking for undefined escapes.
   1237     line = line.replace('\\\\', '')
   1238 
   1239     if search(r'("|\').*\\(%|\[|\(|{)', line):
   1240         error(line_number, 'build/printf_format', 3,
   1241               '%, [, (, and { are undefined character escapes.  Unescape them.')
   1242 
   1243     # For the rest, work with both comments and strings removed.
   1244     line = clean_lines.elided[line_number]
   1245 
   1246     if search(r'\b(const|volatile|void|char|short|int|long'
   1247               r'|float|double|signed|unsigned'
   1248               r'|schar|u?int8|u?int16|u?int32|u?int64)'
   1249               r'\s+(auto|register|static|extern|typedef)\b',
   1250               line):
   1251         error(line_number, 'build/storage_class', 5,
   1252               'Storage class (static, extern, typedef, etc) should be first.')
   1253 
   1254     if match(r'\s*#\s*endif\s*[^/\s]+', line):
   1255         error(line_number, 'build/endif_comment', 5,
   1256               'Uncommented text after #endif is non-standard.  Use a comment.')
   1257 
   1258     if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
   1259         error(line_number, 'build/forward_decl', 5,
   1260               'Inner-style forward declarations are invalid.  Remove this line.')
   1261 
   1262     if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
   1263         error(line_number, 'build/deprecated', 3,
   1264               '>? and <? (max and min) operators are non-standard and deprecated.')
   1265 
   1266     # Track class entry and exit, and attempt to find cases within the
   1267     # class declaration that don't meet the C++ style
   1268     # guidelines. Tracking is very dependent on the code matching Google
   1269     # style guidelines, but it seems to perform well enough in testing
   1270     # to be a worthwhile addition to the checks.
   1271     classinfo_stack = class_state.classinfo_stack
   1272     # Look for a class declaration
   1273     class_decl_match = match(
   1274         r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
   1275     if class_decl_match:
   1276         classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
   1277 
   1278     # Everything else in this function uses the top of the stack if it's
   1279     # not empty.
   1280     if not classinfo_stack:
   1281         return
   1282 
   1283     classinfo = classinfo_stack[-1]
   1284 
   1285     # If the opening brace hasn't been seen look for it and also
   1286     # parent class declarations.
   1287     if not classinfo.seen_open_brace:
   1288         # If the line has a ';' in it, assume it's a forward declaration or
   1289         # a single-line class declaration, which we won't process.
   1290         if line.find(';') != -1:
   1291             classinfo_stack.pop()
   1292             return
   1293         classinfo.seen_open_brace = (line.find('{') != -1)
   1294         # Look for a bare ':'
   1295         if search('(^|[^:]):($|[^:])', line):
   1296             classinfo.is_derived = True
   1297         if not classinfo.seen_open_brace:
   1298             return  # Everything else in this function is for after open brace
   1299 
   1300     # The class may have been declared with namespace or classname qualifiers.
   1301     # The constructor and destructor will not have those qualifiers.
   1302     base_classname = classinfo.name.split('::')[-1]
   1303 
   1304     # Look for single-argument constructors that aren't marked explicit.
   1305     # Technically a valid construct, but against style.
   1306     args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
   1307                  % re.escape(base_classname),
   1308                  line)
   1309     if (args
   1310         and args.group(1) != 'void'
   1311         and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
   1312                       args.group(1).strip())):
   1313         error(line_number, 'runtime/explicit', 5,
   1314               'Single-argument constructors should be marked explicit.')
   1315 
   1316     # Look for methods declared virtual.
   1317     if search(r'\bvirtual\b', line):
   1318         classinfo.virtual_method_line_number = line_number
   1319         # Only look for a destructor declaration on the same line. It would
   1320         # be extremely unlikely for the destructor declaration to occupy
   1321         # more than one line.
   1322         if search(r'~%s\s*\(' % base_classname, line):
   1323             classinfo.has_virtual_destructor = True
   1324 
   1325     # Look for class end.
   1326     brace_depth = classinfo.brace_depth
   1327     brace_depth = brace_depth + line.count('{') - line.count('}')
   1328     if brace_depth <= 0:
   1329         classinfo = classinfo_stack.pop()
   1330         # Try to detect missing virtual destructor declarations.
   1331         # For now, only warn if a non-derived class with virtual methods lacks
   1332         # a virtual destructor. This is to make it less likely that people will
   1333         # declare derived virtual destructors without declaring the base
   1334         # destructor virtual.
   1335         if ((classinfo.virtual_method_line_number is not None)
   1336             and (not classinfo.has_virtual_destructor)
   1337             and (not classinfo.is_derived)):  # Only warn for base classes
   1338             error(classinfo.line_number, 'runtime/virtual', 4,
   1339                   'The class %s probably needs a virtual destructor due to '
   1340                   'having virtual method(s), one declared at line %d.'
   1341                   % (classinfo.name, classinfo.virtual_method_line_number))
   1342     else:
   1343         classinfo.brace_depth = brace_depth
   1344 
   1345 
   1346 def check_spacing_for_function_call(line, line_number, error):
   1347     """Checks for the correctness of various spacing around function calls.
   1348 
   1349     Args:
   1350       line: The text of the line to check.
   1351       line_number: The number of the line to check.
   1352       error: The function to call with any errors found.
   1353     """
   1354 
   1355     # Since function calls often occur inside if/for/foreach/while/switch
   1356     # expressions - which have their own, more liberal conventions - we
   1357     # first see if we should be looking inside such an expression for a
   1358     # function call, to which we can apply more strict standards.
   1359     function_call = line    # if there's no control flow construct, look at whole line
   1360     for pattern in (r'\bif\s*\((.*)\)\s*{',
   1361                     r'\bfor\s*\((.*)\)\s*{',
   1362                     r'\bforeach\s*\((.*)\)\s*{',
   1363                     r'\bwhile\s*\((.*)\)\s*[{;]',
   1364                     r'\bswitch\s*\((.*)\)\s*{'):
   1365         matched = search(pattern, line)
   1366         if matched:
   1367             function_call = matched.group(1)    # look inside the parens for function calls
   1368             break
   1369 
   1370     # Except in if/for/foreach/while/switch, there should never be space
   1371     # immediately inside parens (eg "f( 3, 4 )").  We make an exception
   1372     # for nested parens ( (a+b) + c ).  Likewise, there should never be
   1373     # a space before a ( when it's a function argument.  I assume it's a
   1374     # function argument when the char before the whitespace is legal in
   1375     # a function name (alnum + _) and we're not starting a macro. Also ignore
   1376     # pointers and references to arrays and functions coz they're too tricky:
   1377     # we use a very simple way to recognize these:
   1378     # " (something)(maybe-something)" or
   1379     # " (something)(maybe-something," or
   1380     # " (something)[something]"
   1381     # Note that we assume the contents of [] to be short enough that
   1382     # they'll never need to wrap.
   1383     if (  # Ignore control structures.
   1384         not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
   1385         # Ignore pointers/references to functions.
   1386         and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
   1387         # Ignore pointers/references to arrays.
   1388         and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
   1389         if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
   1390             error(line_number, 'whitespace/parens', 4,
   1391                   'Extra space after ( in function call')
   1392         elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
   1393             error(line_number, 'whitespace/parens', 2,
   1394                   'Extra space after (')
   1395         if (search(r'\w\s+\(', function_call)
   1396             and not match(r'\s*(#|typedef)', function_call)):
   1397             error(line_number, 'whitespace/parens', 4,
   1398                   'Extra space before ( in function call')
   1399         # If the ) is followed only by a newline or a { + newline, assume it's
   1400         # part of a control statement (if/while/etc), and don't complain
   1401         if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
   1402             error(line_number, 'whitespace/parens', 2,
   1403                   'Extra space before )')
   1404 
   1405 
   1406 def is_blank_line(line):
   1407     """Returns true if the given line is blank.
   1408 
   1409     We consider a line to be blank if the line is empty or consists of
   1410     only white spaces.
   1411 
   1412     Args:
   1413       line: A line of a string.
   1414 
   1415     Returns:
   1416       True, if the given line is blank.
   1417     """
   1418     return not line or line.isspace()
   1419 
   1420 
   1421 def detect_functions(clean_lines, line_number, function_state, error):
   1422     """Finds where functions start and end.
   1423 
   1424     Uses a simplistic algorithm assuming other style guidelines
   1425     (especially spacing) are followed.
   1426     Trivial bodies are unchecked, so constructors with huge initializer lists
   1427     may be missed.
   1428 
   1429     Args:
   1430       clean_lines: A CleansedLines instance containing the file.
   1431       line_number: The number of the line to check.
   1432       function_state: Current function name and lines in body so far.
   1433       error: The function to call with any errors found.
   1434     """
   1435     # Are we now past the end of a function?
   1436     if function_state.end_position.row + 1 == line_number:
   1437         function_state.end()
   1438 
   1439     # If we're in a function, don't try to detect a new one.
   1440     if function_state.in_a_function:
   1441         return
   1442 
   1443     lines = clean_lines.lines
   1444     line = lines[line_number]
   1445     raw = clean_lines.raw_lines
   1446     raw_line = raw[line_number]
   1447 
   1448     # Lines ending with a \ indicate a macro. Don't try to check them.
   1449     if raw_line.endswith('\\'):
   1450         return
   1451 
   1452     regexp = r'\s*(\w(\w|::|\*|\&|\s|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\('  # decls * & space::name( ...
   1453     match_result = match(regexp, line)
   1454     if not match_result:
   1455         return
   1456 
   1457     # If the name is all caps and underscores, figure it's a macro and
   1458     # ignore it, unless it's TEST or TEST_F.
   1459     function_name = match_result.group(1).split()[-1]
   1460     if function_name != 'TEST' and function_name != 'TEST_F' and match(r'[A-Z_]+$', function_name):
   1461         return
   1462 
   1463     joined_line = ''
   1464     for start_line_number in xrange(line_number, clean_lines.num_lines()):
   1465         start_line = clean_lines.elided[start_line_number]
   1466         joined_line += ' ' + start_line.lstrip()
   1467         body_match = search(r'{|;', start_line)
   1468         if body_match:
   1469             body_start_position = Position(start_line_number, body_match.start(0))
   1470 
   1471             # Replace template constructs with _ so that no spaces remain in the function name,
   1472             # while keeping the column numbers of other characters the same as "line".
   1473             line_with_no_templates = iteratively_replace_matches_with_char(r'<[^<>]*>', '_', line)
   1474             match_function = search(r'((\w|:|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(', line_with_no_templates)
   1475             if not match_function:
   1476                 return  # The '(' must have been inside of a template.
   1477 
   1478             # Use the column numbers from the modified line to find the
   1479             # function name in the original line.
   1480             function = line[match_function.start(1):match_function.end(1)]
   1481             function_name_start_position = Position(line_number, match_function.start(1))
   1482 
   1483             if match(r'TEST', function):    # Handle TEST... macros
   1484                 parameter_regexp = search(r'(\(.*\))', joined_line)
   1485                 if parameter_regexp:             # Ignore bad syntax
   1486                     function += parameter_regexp.group(1)
   1487             else:
   1488                 function += '()'
   1489 
   1490             parameter_start_position = Position(line_number, match_function.end(1))
   1491             parameter_end_position = close_expression(clean_lines.elided, parameter_start_position)
   1492             if parameter_end_position.row == len(clean_lines.elided):
   1493                 # No end was found.
   1494                 return
   1495 
   1496             if start_line[body_start_position.column] == ';':
   1497                 end_position = Position(body_start_position.row, body_start_position.column + 1)
   1498             else:
   1499                 end_position = close_expression(clean_lines.elided, body_start_position)
   1500 
   1501             # Check for nonsensical positions. (This happens in test cases which check code snippets.)
   1502             if parameter_end_position > body_start_position:
   1503                 return
   1504 
   1505             function_state.begin(function, function_name_start_position, body_start_position, end_position,
   1506                                  parameter_start_position, parameter_end_position, clean_lines)
   1507             return
   1508 
   1509     # No body for the function (or evidence of a non-function) was found.
   1510     error(line_number, 'readability/fn_size', 5,
   1511           'Lint failed to find start of function body.')
   1512 
   1513 
   1514 def check_for_function_lengths(clean_lines, line_number, function_state, error):
   1515     """Reports for long function bodies.
   1516 
   1517     For an overview why this is done, see:
   1518     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
   1519 
   1520     Blank/comment lines are not counted so as to avoid encouraging the removal
   1521     of vertical space and commments just to get through a lint check.
   1522     NOLINT *on the last line of a function* disables this check.
   1523 
   1524     Args:
   1525       clean_lines: A CleansedLines instance containing the file.
   1526       line_number: The number of the line to check.
   1527       function_state: Current function name and lines in body so far.
   1528       error: The function to call with any errors found.
   1529     """
   1530     lines = clean_lines.lines
   1531     line = lines[line_number]
   1532     raw = clean_lines.raw_lines
   1533     raw_line = raw[line_number]
   1534 
   1535     if function_state.end_position.row == line_number:  # last line
   1536         if not search(r'\bNOLINT\b', raw_line):
   1537             function_state.check(error, line_number)
   1538     elif not match(r'^\s*$', line):
   1539         function_state.count(line_number)  # Count non-blank/non-comment lines.
   1540 
   1541 
   1542 def _check_parameter_name_against_text(parameter, text, error):
   1543     """Checks to see if the parameter name is contained within the text.
   1544 
   1545     Return false if the check failed (i.e. an error was produced).
   1546     """
   1547 
   1548     # Treat 'lower with underscores' as a canonical form because it is
   1549     # case insensitive while still retaining word breaks. (This ensures that
   1550     # 'elate' doesn't look like it is duplicating of 'NateLate'.)
   1551     canonical_parameter_name = parameter.lower_with_underscores_name()
   1552 
   1553     # Appends "object" to all text to catch variables that did the same (but only
   1554     # do this when the parameter name is more than a single character to avoid
   1555     # flagging 'b' which may be an ok variable when used in an rgba function).
   1556     if len(canonical_parameter_name) > 1:
   1557         text = sub(r'(\w)\b', r'\1Object', text)
   1558     canonical_text = _convert_to_lower_with_underscores(text)
   1559 
   1560     # Used to detect cases like ec for ExceptionCode.
   1561     acronym = _create_acronym(text).lower()
   1562     if canonical_text.find(canonical_parameter_name) != -1 or acronym.find(canonical_parameter_name) != -1:
   1563         error(parameter.row, 'readability/parameter_name', 5,
   1564               'The parameter name "%s" adds no information, so it should be removed.' % parameter.name)
   1565         return False
   1566     return True
   1567 
   1568 
   1569 def check_function_definition(filename, file_extension, clean_lines, line_number, function_state, error):
   1570     """Check that function definitions for style issues.
   1571 
   1572     Specifically, check that parameter names in declarations add information.
   1573 
   1574     Args:
   1575        filename: Filename of the file that is being processed.
   1576        file_extension: The current file extension, without the leading dot.
   1577        clean_lines: A CleansedLines instance containing the file.
   1578        line_number: The number of the line to check.
   1579        function_state: Current function name and lines in body so far.
   1580        error: The function to call with any errors found.
   1581     """
   1582     if line_number != function_state.body_start_position.row:
   1583         return
   1584 
   1585     modifiers_and_return_type = function_state.modifiers_and_return_type()
   1586     if filename.find('/chromium/') != -1 and search(r'\bWEBKIT_API\b', modifiers_and_return_type):
   1587         if filename.find('/chromium/public/') == -1:
   1588             error(function_state.function_name_start_position.row, 'readability/webkit_api', 5,
   1589                   'WEBKIT_API should only appear in the chromium public directory.')
   1590         elif not file_extension == "h":
   1591             error(function_state.function_name_start_position.row, 'readability/webkit_api', 5,
   1592                   'WEBKIT_API should only be used in header files.')
   1593         elif not function_state.is_declaration or search(r'\binline\b', modifiers_and_return_type):
   1594             error(function_state.function_name_start_position.row, 'readability/webkit_api', 5,
   1595                   'WEBKIT_API should not be used on a function with a body.')
   1596         elif function_state.is_pure:
   1597             error(function_state.function_name_start_position.row, 'readability/webkit_api', 5,
   1598                   'WEBKIT_API should not be used with a pure virtual function.')
   1599 
   1600     # Do checks specific to function declaractions.
   1601     if not function_state.is_declaration:
   1602         return
   1603     parameter_list = function_state.parameter_list()
   1604     for parameter in parameter_list:
   1605         if not parameter.name:
   1606             continue
   1607 
   1608         # Check the parameter name against the function name for single parameter set functions.
   1609         if len(parameter_list) == 1 and match('set[A-Z]', function_state.current_function):
   1610             trimmed_function_name = function_state.current_function[len('set'):]
   1611             if not _check_parameter_name_against_text(parameter, trimmed_function_name, error):
   1612                 continue  # Since an error was noted for this name, move to the next parameter.
   1613 
   1614         # Check the parameter name against the type.
   1615         if not _check_parameter_name_against_text(parameter, parameter.type, error):
   1616             continue  # Since an error was noted for this name, move to the next parameter.
   1617 
   1618 
   1619 def check_pass_ptr_usage(clean_lines, line_number, function_state, error):
   1620     """Check for proper usage of Pass*Ptr.
   1621 
   1622     Currently this is limited to detecting declarations of Pass*Ptr
   1623     variables inside of functions.
   1624 
   1625     Args:
   1626       clean_lines: A CleansedLines instance containing the file.
   1627       line_number: The number of the line to check.
   1628       function_state: Current function name and lines in body so far.
   1629       error: The function to call with any errors found.
   1630     """
   1631     if not function_state.in_a_function:
   1632         return
   1633 
   1634     lines = clean_lines.lines
   1635     line = lines[line_number]
   1636     if line_number > function_state.body_start_position.row:
   1637         matched_pass_ptr = match(r'^\s*Pass([A-Z][A-Za-z]*)Ptr<', line)
   1638         if matched_pass_ptr:
   1639             type_name = 'Pass%sPtr' % matched_pass_ptr.group(1)
   1640             error(line_number, 'readability/pass_ptr', 5,
   1641                   'Local variables should never be %s (see '
   1642                   'http://webkit.org/coding/RefPtr.html).' % type_name)
   1643 
   1644 
   1645 def check_spacing(file_extension, clean_lines, line_number, error):
   1646     """Checks for the correctness of various spacing issues in the code.
   1647 
   1648     Things we check for: spaces around operators, spaces after
   1649     if/for/while/switch, no spaces around parens in function calls, two
   1650     spaces between code and comment, don't start a block with a blank
   1651     line, don't end a function with a blank line, don't have too many
   1652     blank lines in a row.
   1653 
   1654     Args:
   1655       file_extension: The current file extension, without the leading dot.
   1656       clean_lines: A CleansedLines instance containing the file.
   1657       line_number: The number of the line to check.
   1658       error: The function to call with any errors found.
   1659     """
   1660 
   1661     raw = clean_lines.raw_lines
   1662     line = raw[line_number]
   1663 
   1664     # Before nixing comments, check if the line is blank for no good
   1665     # reason.  This includes the first line after a block is opened, and
   1666     # blank lines at the end of a function (ie, right before a line like '}').
   1667     if is_blank_line(line):
   1668         elided = clean_lines.elided
   1669         previous_line = elided[line_number - 1]
   1670         previous_brace = previous_line.rfind('{')
   1671         # FIXME: Don't complain if line before blank line, and line after,
   1672         #        both start with alnums and are indented the same amount.
   1673         #        This ignores whitespace at the start of a namespace block
   1674         #        because those are not usually indented.
   1675         if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
   1676             and previous_line[:previous_brace].find('namespace') == -1):
   1677             # OK, we have a blank line at the start of a code block.  Before we
   1678             # complain, we check if it is an exception to the rule: The previous
   1679             # non-empty line has the parameters of a function header that are indented
   1680             # 4 spaces (because they did not fit in a 80 column line when placed on
   1681             # the same line as the function name).  We also check for the case where
   1682             # the previous line is indented 6 spaces, which may happen when the
   1683             # initializers of a constructor do not fit into a 80 column line.
   1684             exception = False
   1685             if match(r' {6}\w', previous_line):  # Initializer list?
   1686                 # We are looking for the opening column of initializer list, which
   1687                 # should be indented 4 spaces to cause 6 space indentation afterwards.
   1688                 search_position = line_number - 2
   1689                 while (search_position >= 0
   1690                        and match(r' {6}\w', elided[search_position])):
   1691                     search_position -= 1
   1692                 exception = (search_position >= 0
   1693                              and elided[search_position][:5] == '    :')
   1694             else:
   1695                 # Search for the function arguments or an initializer list.  We use a
   1696                 # simple heuristic here: If the line is indented 4 spaces; and we have a
   1697                 # closing paren, without the opening paren, followed by an opening brace
   1698                 # or colon (for initializer lists) we assume that it is the last line of
   1699                 # a function header.  If we have a colon indented 4 spaces, it is an
   1700                 # initializer list.
   1701                 exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
   1702                                    previous_line)
   1703                              or match(r' {4}:', previous_line))
   1704 
   1705             if not exception:
   1706                 error(line_number, 'whitespace/blank_line', 2,
   1707                       'Blank line at the start of a code block.  Is this needed?')
   1708         # This doesn't ignore whitespace at the end of a namespace block
   1709         # because that is too hard without pairing open/close braces;
   1710         # however, a special exception is made for namespace closing
   1711         # brackets which have a comment containing "namespace".
   1712         #
   1713         # Also, ignore blank lines at the end of a block in a long if-else
   1714         # chain, like this:
   1715         #   if (condition1) {
   1716         #     // Something followed by a blank line
   1717         #
   1718         #   } else if (condition2) {
   1719         #     // Something else
   1720         #   }
   1721         if line_number + 1 < clean_lines.num_lines():
   1722             next_line = raw[line_number + 1]
   1723             if (next_line
   1724                 and match(r'\s*}', next_line)
   1725                 and next_line.find('namespace') == -1
   1726                 and next_line.find('} else ') == -1):
   1727                 error(line_number, 'whitespace/blank_line', 3,
   1728                       'Blank line at the end of a code block.  Is this needed?')
   1729 
   1730     # Next, we check for proper spacing with respect to comments.
   1731     comment_position = line.find('//')
   1732     if comment_position != -1:
   1733         # Check if the // may be in quotes.  If so, ignore it
   1734         # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
   1735         if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
   1736             # Allow one space before end of line comment.
   1737             if (not match(r'^\s*$', line[:comment_position])
   1738                 and (comment_position >= 1
   1739                 and ((line[comment_position - 1] not in string.whitespace)
   1740                      or (comment_position >= 2
   1741                          and line[comment_position - 2] in string.whitespace)))):
   1742                 error(line_number, 'whitespace/comments', 5,
   1743                       'One space before end of line comments')
   1744             # There should always be a space between the // and the comment
   1745             commentend = comment_position + 2
   1746             if commentend < len(line) and not line[commentend] == ' ':
   1747                 # but some lines are exceptions -- e.g. if they're big
   1748                 # comment delimiters like:
   1749                 # //----------------------------------------------------------
   1750                 # or they begin with multiple slashes followed by a space:
   1751                 # //////// Header comment
   1752                 matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
   1753                            or search(r'^/+ ', line[commentend:]))
   1754                 if not matched:
   1755                     error(line_number, 'whitespace/comments', 4,
   1756                           'Should have a space between // and comment')
   1757 
   1758             # There should only be one space after punctuation in a comment.
   1759             if search('[.!?,;:]\s\s', line[comment_position:]):
   1760                 error(line_number, 'whitespace/comments', 5,
   1761                       'Should only a single space after a punctuation in a comment.')
   1762 
   1763     line = clean_lines.elided[line_number]  # get rid of comments and strings
   1764 
   1765     # Don't try to do spacing checks for operator methods
   1766     line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>|\+=|-=|\*=|/=|%=|&=|\|=|^=|<<=|>>=)\(', 'operator\(', line)
   1767     # Don't try to do spacing checks for #include or #import statements at
   1768     # minimum because it messes up checks for spacing around /
   1769     if match(r'\s*#\s*(?:include|import)', line):
   1770         return
   1771     if search(r'[\w.]=[\w.]', line):
   1772         error(line_number, 'whitespace/operators', 4,
   1773               'Missing spaces around =')
   1774 
   1775     # FIXME: It's not ok to have spaces around binary operators like .
   1776 
   1777     # You should always have whitespace around binary operators.
   1778     # Alas, we can't test < or > because they're legitimately used sans spaces
   1779     # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
   1780     # only if it's not template params list spilling into the next line.
   1781     matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line)
   1782     if not matched:
   1783         # Note that while it seems that the '<[^<]*' term in the following
   1784         # regexp could be simplified to '<.*', which would indeed match
   1785         # the same class of strings, the [^<] means that searching for the
   1786         # regexp takes linear rather than quadratic time.
   1787         if not search(r'<[^<]*,\s*$', line):  # template params spill
   1788             matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
   1789     if matched:
   1790         error(line_number, 'whitespace/operators', 3,
   1791               'Missing spaces around %s' % matched.group(1))
   1792 
   1793     # There shouldn't be space around unary operators
   1794     matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
   1795     if matched:
   1796         error(line_number, 'whitespace/operators', 4,
   1797               'Extra space for operator %s' % matched.group(1))
   1798 
   1799     # A pet peeve of mine: no spaces after an if, while, switch, or for
   1800     matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
   1801     if matched:
   1802         error(line_number, 'whitespace/parens', 5,
   1803               'Missing space before ( in %s' % matched.group(1))
   1804 
   1805     # For if/for/foreach/while/switch, the left and right parens should be
   1806     # consistent about how many spaces are inside the parens, and
   1807     # there should either be zero or one spaces inside the parens.
   1808     # We don't want: "if ( foo)" or "if ( foo   )".
   1809     # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   1810     matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<remainder>.*)$', line)
   1811     if matched:
   1812         statement = matched.group('statement')
   1813         condition, rest = up_to_unmatched_closing_paren(matched.group('remainder'))
   1814         if condition is not None:
   1815             condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition)
   1816             if condition_match:
   1817                 n_leading = len(condition_match.group('leading'))
   1818                 n_trailing = len(condition_match.group('trailing'))
   1819                 if n_leading != 0:
   1820                     for_exception = statement == 'for' and condition.startswith(' ;')
   1821                     if not for_exception:
   1822                         error(line_number, 'whitespace/parens', 5,
   1823                               'Extra space after ( in %s' % statement)
   1824                 if n_trailing != 0:
   1825                     for_exception = statement == 'for' and condition.endswith('; ')
   1826                     if not for_exception:
   1827                         error(line_number, 'whitespace/parens', 5,
   1828                               'Extra space before ) in %s' % statement)
   1829 
   1830             # Do not check for more than one command in macros
   1831             in_preprocessor_directive = match(r'\s*#', line)
   1832             if not in_preprocessor_directive and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest):
   1833                 error(line_number, 'whitespace/parens', 4,
   1834                       'More than one command on the same line in %s' % statement)
   1835 
   1836     # You should always have a space after a comma (either as fn arg or operator)
   1837     if search(r',[^\s]', line):
   1838         error(line_number, 'whitespace/comma', 3,
   1839               'Missing space after ,')
   1840 
   1841     matched = search(r'^\s*(?P<token1>[a-zA-Z0-9_\*&]+)\s\s+(?P<token2>[a-zA-Z0-9_\*&]+)', line)
   1842     if matched:
   1843         error(line_number, 'whitespace/declaration', 3,
   1844               'Extra space between %s and %s' % (matched.group('token1'), matched.group('token2')))
   1845 
   1846     if file_extension == 'cpp':
   1847         # C++ should have the & or * beside the type not the variable name.
   1848         matched = match(r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+', line)
   1849         if matched:
   1850             error(line_number, 'whitespace/declaration', 3,
   1851                   'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
   1852 
   1853     elif file_extension == 'c':
   1854         # C Pointer declaration should have the * beside the variable not the type name.
   1855         matched = search(r'^\s*\w+\*\s+\w+', line)
   1856         if matched:
   1857             error(line_number, 'whitespace/declaration', 3,
   1858                   'Declaration has space between * and variable name in %s' % matched.group(0).strip())
   1859 
   1860     # Next we will look for issues with function calls.
   1861     check_spacing_for_function_call(line, line_number, error)
   1862 
   1863     # Except after an opening paren, you should have spaces before your braces.
   1864     # And since you should never have braces at the beginning of a line, this is
   1865     # an easy test.
   1866     if search(r'[^ ({]{', line):
   1867         error(line_number, 'whitespace/braces', 5,
   1868               'Missing space before {')
   1869 
   1870     # Make sure '} else {' has spaces.
   1871     if search(r'}else', line):
   1872         error(line_number, 'whitespace/braces', 5,
   1873               'Missing space before else')
   1874 
   1875     # You shouldn't have spaces before your brackets, except maybe after
   1876     # 'delete []' or 'new char * []'.
   1877     if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
   1878         error(line_number, 'whitespace/braces', 5,
   1879               'Extra space before [')
   1880 
   1881     # You shouldn't have a space before a semicolon at the end of the line.
   1882     # There's a special case for "for" since the style guide allows space before
   1883     # the semicolon there.
   1884     if search(r':\s*;\s*$', line):
   1885         error(line_number, 'whitespace/semicolon', 5,
   1886               'Semicolon defining empty statement. Use { } instead.')
   1887     elif search(r'^\s*;\s*$', line):
   1888         error(line_number, 'whitespace/semicolon', 5,
   1889               'Line contains only semicolon. If this should be an empty statement, '
   1890               'use { } instead.')
   1891     elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
   1892         error(line_number, 'whitespace/semicolon', 5,
   1893               'Extra space before last semicolon. If this should be an empty '
   1894               'statement, use { } instead.')
   1895     elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
   1896           and line.count('(') == line.count(')')
   1897           # Allow do {} while();
   1898           and not search(r'}\s*while', line)):
   1899         error(line_number, 'whitespace/semicolon', 5,
   1900               'Semicolon defining empty statement for this loop. Use { } instead.')
   1901 
   1902 
   1903 def get_previous_non_blank_line(clean_lines, line_number):
   1904     """Return the most recent non-blank line and its line number.
   1905 
   1906     Args:
   1907       clean_lines: A CleansedLines instance containing the file contents.
   1908       line_number: The number of the line to check.
   1909 
   1910     Returns:
   1911       A tuple with two elements.  The first element is the contents of the last
   1912       non-blank line before the current line, or the empty string if this is the
   1913       first non-blank line.  The second is the line number of that line, or -1
   1914       if this is the first non-blank line.
   1915     """
   1916 
   1917     previous_line_number = line_number - 1
   1918     while previous_line_number >= 0:
   1919         previous_line = clean_lines.elided[previous_line_number]
   1920         if not is_blank_line(previous_line):     # if not a blank line...
   1921             return (previous_line, previous_line_number)
   1922         previous_line_number -= 1
   1923     return ('', -1)
   1924 
   1925 
   1926 def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error):
   1927     """Looks for indentation errors inside of namespaces.
   1928 
   1929     Args:
   1930       clean_lines: A CleansedLines instance containing the file.
   1931       line_number: The number of the line to check.
   1932       file_extension: The extension (dot not included) of the file.
   1933       file_state: A _FileState instance which maintains information about
   1934                   the state of things in the file.
   1935       error: The function to call with any errors found.
   1936     """
   1937 
   1938     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   1939 
   1940     namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
   1941     if not namespace_match:
   1942         return
   1943 
   1944     current_indentation_level = len(namespace_match.group('namespace_indentation'))
   1945     if current_indentation_level > 0:
   1946         # Don't warn about an indented namespace if we already warned about indented code.
   1947         if not file_state.did_inside_namespace_indent_warning():
   1948             error(line_number, 'whitespace/indent', 4,
   1949                   'namespace should never be indented.')
   1950         return
   1951     looking_for_semicolon = False;
   1952     line_offset = 0
   1953     in_preprocessor_directive = False;
   1954     for current_line in clean_lines.elided[line_number + 1:]:
   1955         line_offset += 1
   1956         if not current_line.strip():
   1957             continue
   1958         if not current_indentation_level:
   1959             if not (in_preprocessor_directive or looking_for_semicolon):
   1960                 if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning():
   1961                     file_state.set_did_inside_namespace_indent_warning()
   1962                     error(line_number + line_offset, 'whitespace/indent', 4,
   1963                           'Code inside a namespace should not be indented.')
   1964             if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax.
   1965                 in_preprocessor_directive = current_line[-1] == '\\'
   1966             else:
   1967                 looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\')
   1968         else:
   1969             looking_for_semicolon = False; # If we have a brace we may not need a semicolon.
   1970         current_indentation_level += current_line.count('{') - current_line.count('}')
   1971         if current_indentation_level < 0:
   1972             break;
   1973 
   1974 
   1975 def check_using_std(clean_lines, line_number, file_state, error):
   1976     """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'.
   1977 
   1978     Args:
   1979       clean_lines: A CleansedLines instance containing the file.
   1980       line_number: The number of the line to check.
   1981       file_state: A _FileState instance which maintains information about
   1982                   the state of things in the file.
   1983       error: The function to call with any errors found.
   1984     """
   1985 
   1986     # This check doesn't apply to C or Objective-C implementation files.
   1987     if file_state.is_c_or_objective_c():
   1988         return
   1989 
   1990     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   1991 
   1992     using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line)
   1993     if not using_std_match:
   1994         return
   1995 
   1996     method_name = using_std_match.group('method_name')
   1997     error(line_number, 'build/using_std', 4,
   1998           "Use 'using namespace std;' instead of 'using std::%s;'." % method_name)
   1999 
   2000 
   2001 def check_max_min_macros(clean_lines, line_number, file_state, error):
   2002     """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min().
   2003 
   2004     Args:
   2005       clean_lines: A CleansedLines instance containing the file.
   2006       line_number: The number of the line to check.
   2007       file_state: A _FileState instance which maintains information about
   2008                   the state of things in the file.
   2009       error: The function to call with any errors found.
   2010     """
   2011 
   2012     # This check doesn't apply to C or Objective-C implementation files.
   2013     if file_state.is_c_or_objective_c():
   2014         return
   2015 
   2016     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2017 
   2018     max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line)
   2019     if not max_min_macros_search:
   2020         return
   2021 
   2022     max_min_macro = max_min_macros_search.group('max_min_macro')
   2023     max_min_macro_lower = max_min_macro.lower()
   2024     error(line_number, 'runtime/max_min_macros', 4,
   2025           'Use std::%s() or std::%s<type>() instead of the %s() macro.'
   2026           % (max_min_macro_lower, max_min_macro_lower, max_min_macro))
   2027 
   2028 
   2029 def check_switch_indentation(clean_lines, line_number, error):
   2030     """Looks for indentation errors inside of switch statements.
   2031 
   2032     Args:
   2033       clean_lines: A CleansedLines instance containing the file.
   2034       line_number: The number of the line to check.
   2035       error: The function to call with any errors found.
   2036     """
   2037 
   2038     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2039 
   2040     switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
   2041     if not switch_match:
   2042         return
   2043 
   2044     switch_indentation = switch_match.group('switch_indentation')
   2045     inner_indentation = switch_indentation + ' ' * 4
   2046     line_offset = 0
   2047     encountered_nested_switch = False
   2048 
   2049     for current_line in clean_lines.elided[line_number + 1:]:
   2050         line_offset += 1
   2051 
   2052         # Skip not only empty lines but also those with preprocessor directives.
   2053         if current_line.strip() == '' or current_line.startswith('#'):
   2054             continue
   2055 
   2056         if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
   2057             # Complexity alarm - another switch statement nested inside the one
   2058             # that we're currently testing. We'll need to track the extent of
   2059             # that inner switch if the upcoming label tests are still supposed
   2060             # to work correctly. Let's not do that; instead, we'll finish
   2061             # checking this line, and then leave it like that. Assuming the
   2062             # indentation is done consistently (even if incorrectly), this will
   2063             # still catch all indentation issues in practice.
   2064             encountered_nested_switch = True
   2065 
   2066         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   2067         current_indentation = current_indentation_match.group('indentation')
   2068         remaining_line = current_indentation_match.group('remaining_line')
   2069 
   2070         # End the check at the end of the switch statement.
   2071         if remaining_line.startswith('}') and current_indentation == switch_indentation:
   2072             break
   2073         # Case and default branches should not be indented. The regexp also
   2074         # catches single-line cases like "default: break;" but does not trigger
   2075         # on stuff like "Document::Foo();".
   2076         elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
   2077             if current_indentation != switch_indentation:
   2078                 error(line_number + line_offset, 'whitespace/indent', 4,
   2079                       'A case label should not be indented, but line up with its switch statement.')
   2080                 # Don't throw an error for multiple badly indented labels,
   2081                 # one should be enough to figure out the problem.
   2082                 break
   2083         # We ignore goto labels at the very beginning of a line.
   2084         elif match(r'\w+\s*:\s*$', remaining_line):
   2085             continue
   2086         # It's not a goto label, so check if it's indented at least as far as
   2087         # the switch statement plus one more level of indentation.
   2088         elif not current_indentation.startswith(inner_indentation):
   2089             error(line_number + line_offset, 'whitespace/indent', 4,
   2090                   'Non-label code inside switch statements should be indented.')
   2091             # Don't throw an error for multiple badly indented statements,
   2092             # one should be enough to figure out the problem.
   2093             break
   2094 
   2095         if encountered_nested_switch:
   2096             break
   2097 
   2098 
   2099 def check_braces(clean_lines, line_number, error):
   2100     """Looks for misplaced braces (e.g. at the end of line).
   2101 
   2102     Args:
   2103       clean_lines: A CleansedLines instance containing the file.
   2104       line_number: The number of the line to check.
   2105       error: The function to call with any errors found.
   2106     """
   2107 
   2108     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2109 
   2110     if match(r'\s*{\s*$', line):
   2111         # We allow an open brace to start a line in the case where someone
   2112         # is using braces for function definition or in a block to
   2113         # explicitly create a new scope, which is commonly used to control
   2114         # the lifetime of stack-allocated variables.  We don't detect this
   2115         # perfectly: we just don't complain if the last non-whitespace
   2116         # character on the previous non-blank line is ';', ':', '{', '}',
   2117         # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
   2118         # We also allow '#' for #endif and '=' for array initialization.
   2119         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2120         if ((not search(r'[;:}{)=]\s*$|\)\s*const\s*$', previous_line)
   2121              or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
   2122             and previous_line.find('#') < 0):
   2123             error(line_number, 'whitespace/braces', 4,
   2124                   'This { should be at the end of the previous line')
   2125     elif (search(r'\)\s*(const\s*)?{\s*$', line)
   2126           and line.count('(') == line.count(')')
   2127           and not search(r'\b(if|for|foreach|while|switch)\b', line)
   2128           and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)):
   2129         error(line_number, 'whitespace/braces', 4,
   2130               'Place brace on its own line for function definitions.')
   2131 
   2132     if (match(r'\s*}\s*(else\s*({\s*)?)?$', line) and line_number > 1):
   2133         # We check if a closed brace has started a line to see if a
   2134         # one line control statement was previous.
   2135         previous_line = clean_lines.elided[line_number - 2]
   2136         last_open_brace = previous_line.rfind('{')
   2137         if (last_open_brace != -1 and previous_line.find('}', last_open_brace) == -1
   2138             and search(r'\b(if|for|foreach|while|else)\b', previous_line)):
   2139             error(line_number, 'whitespace/braces', 4,
   2140                   'One line control clauses should not use braces.')
   2141 
   2142     # An else clause should be on the same line as the preceding closing brace.
   2143     if match(r'\s*else\s*', line):
   2144         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2145         if match(r'\s*}\s*$', previous_line):
   2146             error(line_number, 'whitespace/newline', 4,
   2147                   'An else should appear on the same line as the preceding }')
   2148 
   2149     # Likewise, an else should never have the else clause on the same line
   2150     if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
   2151         error(line_number, 'whitespace/newline', 4,
   2152               'Else clause should never be on same line as else (use 2 lines)')
   2153 
   2154     # In the same way, a do/while should never be on one line
   2155     if match(r'\s*do [^\s{]', line):
   2156         error(line_number, 'whitespace/newline', 4,
   2157               'do/while clauses should not be on a single line')
   2158 
   2159     # Braces shouldn't be followed by a ; unless they're defining a struct
   2160     # or initializing an array.
   2161     # We can't tell in general, but we can for some common cases.
   2162     previous_line_number = line_number
   2163     while True:
   2164         (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
   2165         if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
   2166             line = previous_line + line
   2167         else:
   2168             break
   2169     if (search(r'{.*}\s*;', line)
   2170         and line.count('{') == line.count('}')
   2171         and not search(r'struct|class|enum|\s*=\s*{', line)):
   2172         error(line_number, 'readability/braces', 4,
   2173               "You don't need a ; after a }")
   2174 
   2175 
   2176 def check_exit_statement_simplifications(clean_lines, line_number, error):
   2177     """Looks for else or else-if statements that should be written as an
   2178     if statement when the prior if concludes with a return, break, continue or
   2179     goto statement.
   2180 
   2181     Args:
   2182       clean_lines: A CleansedLines instance containing the file.
   2183       line_number: The number of the line to check.
   2184       error: The function to call with any errors found.
   2185     """
   2186 
   2187     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2188 
   2189     else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
   2190     if not else_match:
   2191         return
   2192 
   2193     else_indentation = else_match.group('else_indentation')
   2194     inner_indentation = else_indentation + ' ' * 4
   2195 
   2196     previous_lines = clean_lines.elided[:line_number]
   2197     previous_lines.reverse()
   2198     line_offset = 0
   2199     encountered_exit_statement = False
   2200 
   2201     for current_line in previous_lines:
   2202         line_offset -= 1
   2203 
   2204         # Skip not only empty lines but also those with preprocessor directives
   2205         # and goto labels.
   2206         if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
   2207             continue
   2208 
   2209         # Skip lines with closing braces on the original indentation level.
   2210         # Even though the styleguide says they should be on the same line as
   2211         # the "else if" statement, we also want to check for instances where
   2212         # the current code does not comply with the coding style. Thus, ignore
   2213         # these lines and proceed to the line before that.
   2214         if current_line == else_indentation + '}':
   2215             continue
   2216 
   2217         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   2218         current_indentation = current_indentation_match.group('indentation')
   2219         remaining_line = current_indentation_match.group('remaining_line')
   2220 
   2221         # As we're going up the lines, the first real statement to encounter
   2222         # has to be an exit statement (return, break, continue or goto) -
   2223         # otherwise, this check doesn't apply.
   2224         if not encountered_exit_statement:
   2225             # We only want to find exit statements if they are on exactly
   2226             # the same level of indentation as expected from the code inside
   2227             # the block. If the indentation doesn't strictly match then we
   2228             # might have a nested if or something, which must be ignored.
   2229             if current_indentation != inner_indentation:
   2230                 break
   2231             if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
   2232                 encountered_exit_statement = True
   2233                 continue
   2234             break
   2235 
   2236         # When code execution reaches this point, we've found an exit statement
   2237         # as last statement of the previous block. Now we only need to make
   2238         # sure that the block belongs to an "if", then we can throw an error.
   2239 
   2240         # Skip lines with opening braces on the original indentation level,
   2241         # similar to the closing braces check above. ("if (condition)\n{")
   2242         if current_line == else_indentation + '{':
   2243             continue
   2244 
   2245         # Skip everything that's further indented than our "else" or "else if".
   2246         if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
   2247             continue
   2248 
   2249         # So we've got a line with same (or less) indentation. Is it an "if"?
   2250         # If yes: throw an error. If no: don't throw an error.
   2251         # Whatever the outcome, this is the end of our loop.
   2252         if match(r'if\s*\(', remaining_line):
   2253             if else_match.start('else') != -1:
   2254                 error(line_number + line_offset, 'readability/control_flow', 4,
   2255                       'An else statement can be removed when the prior "if" '
   2256                       'concludes with a return, break, continue or goto statement.')
   2257             else:
   2258                 error(line_number + line_offset, 'readability/control_flow', 4,
   2259                       'An else if statement should be written as an if statement '
   2260                       'when the prior "if" concludes with a return, break, '
   2261                       'continue or goto statement.')
   2262         break
   2263 
   2264 
   2265 def replaceable_check(operator, macro, line):
   2266     """Determine whether a basic CHECK can be replaced with a more specific one.
   2267 
   2268     For example suggest using CHECK_EQ instead of CHECK(a == b) and
   2269     similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
   2270 
   2271     Args:
   2272       operator: The C++ operator used in the CHECK.
   2273       macro: The CHECK or EXPECT macro being called.
   2274       line: The current source line.
   2275 
   2276     Returns:
   2277       True if the CHECK can be replaced with a more specific one.
   2278     """
   2279 
   2280     # This matches decimal and hex integers, strings, and chars (in that order).
   2281     match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
   2282 
   2283     # Expression to match two sides of the operator with something that
   2284     # looks like a literal, since CHECK(x == iterator) won't compile.
   2285     # This means we can't catch all the cases where a more specific
   2286     # CHECK is possible, but it's less annoying than dealing with
   2287     # extraneous warnings.
   2288     match_this = (r'\s*' + macro + r'\((\s*' +
   2289                   match_constant + r'\s*' + operator + r'[^<>].*|'
   2290                   r'.*[^<>]' + operator + r'\s*' + match_constant +
   2291                   r'\s*\))')
   2292 
   2293     # Don't complain about CHECK(x == NULL) or similar because
   2294     # CHECK_EQ(x, NULL) won't compile (requires a cast).
   2295     # Also, don't complain about more complex boolean expressions
   2296     # involving && or || such as CHECK(a == b || c == d).
   2297     return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
   2298 
   2299 
   2300 def check_check(clean_lines, line_number, error):
   2301     """Checks the use of CHECK and EXPECT macros.
   2302 
   2303     Args:
   2304       clean_lines: A CleansedLines instance containing the file.
   2305       line_number: The number of the line to check.
   2306       error: The function to call with any errors found.
   2307     """
   2308 
   2309     # Decide the set of replacement macros that should be suggested
   2310     raw_lines = clean_lines.raw_lines
   2311     current_macro = ''
   2312     for macro in _CHECK_MACROS:
   2313         if raw_lines[line_number].find(macro) >= 0:
   2314             current_macro = macro
   2315             break
   2316     if not current_macro:
   2317         # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
   2318         return
   2319 
   2320     line = clean_lines.elided[line_number]        # get rid of comments and strings
   2321 
   2322     # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
   2323     for operator in ['==', '!=', '>=', '>', '<=', '<']:
   2324         if replaceable_check(operator, current_macro, line):
   2325             error(line_number, 'readability/check', 2,
   2326                   'Consider using %s instead of %s(a %s b)' % (
   2327                       _CHECK_REPLACEMENT[current_macro][operator],
   2328                       current_macro, operator))
   2329             break
   2330 
   2331 
   2332 def check_for_comparisons_to_zero(clean_lines, line_number, error):
   2333     # Get the line without comments and strings.
   2334     line = clean_lines.elided[line_number]
   2335 
   2336     # Include NULL here so that users don't have to convert NULL to 0 first and then get this error.
   2337     if search(r'[=!]=\s*(NULL|0|true|false)\W', line) or search(r'\W(NULL|0|true|false)\s*[=!]=', line):
   2338         error(line_number, 'readability/comparison_to_zero', 5,
   2339               'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.')
   2340 
   2341 
   2342 def check_for_null(clean_lines, line_number, file_state, error):
   2343     # This check doesn't apply to C or Objective-C implementation files.
   2344     if file_state.is_c_or_objective_c():
   2345         return
   2346 
   2347     line = clean_lines.elided[line_number]
   2348 
   2349     # Don't warn about NULL usage in g_*(). See Bug 32858 and 39372.
   2350     if search(r'\bg(_[a-z]+)+\b', line):
   2351         return
   2352 
   2353     # Don't warn about NULL usage in gst_*_many(). See Bug 39740
   2354     if search(r'\bgst_\w+_many\b', line):
   2355         return
   2356 
   2357     # Don't warn about NULL usage in g_str{join,concat}(). See Bug 34834
   2358     if search(r'\bg_str(join|concat)\b', line):
   2359         return
   2360 
   2361     # Don't warn about NULL usage in gdk_pixbuf_save_to_*{join,concat}(). See Bug 43090.
   2362     if search(r'\bgdk_pixbuf_save_to\w+\b', line):
   2363         return
   2364 
   2365     # Don't warn about NULL usage in gtk_widget_style_get(). See Bug 51758.
   2366     if search(r'\bgtk_widget_style_get\(\w+\b', line):
   2367         return
   2368 
   2369     if search(r'\bNULL\b', line):
   2370         error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
   2371         return
   2372 
   2373     line = clean_lines.raw_lines[line_number]
   2374     # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
   2375     # matches, then do the check with strings collapsed to avoid giving errors for
   2376     # NULLs occurring in strings.
   2377     if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
   2378         error(line_number, 'readability/null', 4, 'Use 0 or null instead of NULL (even in *comments*).')
   2379 
   2380 def get_line_width(line):
   2381     """Determines the width of the line in column positions.
   2382 
   2383     Args:
   2384       line: A string, which may be a Unicode string.
   2385 
   2386     Returns:
   2387       The width of the line in column positions, accounting for Unicode
   2388       combining characters and wide characters.
   2389     """
   2390     if isinstance(line, unicode):
   2391         width = 0
   2392         for c in unicodedata.normalize('NFC', line):
   2393             if unicodedata.east_asian_width(c) in ('W', 'F'):
   2394                 width += 2
   2395             elif not unicodedata.combining(c):
   2396                 width += 1
   2397         return width
   2398     return len(line)
   2399 
   2400 
   2401 def check_style(clean_lines, line_number, file_extension, class_state, file_state, error):
   2402     """Checks rules from the 'C++ style rules' section of cppguide.html.
   2403 
   2404     Most of these rules are hard to test (naming, comment style), but we
   2405     do what we can.  In particular we check for 4-space indents, line lengths,
   2406     tab usage, spaces inside code, etc.
   2407 
   2408     Args:
   2409       clean_lines: A CleansedLines instance containing the file.
   2410       line_number: The number of the line to check.
   2411       file_extension: The extension (without the dot) of the filename.
   2412       class_state: A _ClassState instance which maintains information about
   2413                    the current stack of nested class declarations being parsed.
   2414       file_state: A _FileState instance which maintains information about
   2415                   the state of things in the file.
   2416       error: The function to call with any errors found.
   2417     """
   2418 
   2419     raw_lines = clean_lines.raw_lines
   2420     line = raw_lines[line_number]
   2421 
   2422     if line.find('\t') != -1:
   2423         error(line_number, 'whitespace/tab', 1,
   2424               'Tab found; better to use spaces')
   2425 
   2426     # One or three blank spaces at the beginning of the line is weird; it's
   2427     # hard to reconcile that with 4-space indents.
   2428     # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
   2429     # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
   2430     # if(RLENGTH > 20) complain = 0;
   2431     # if(match($0, " +(error|private|public|protected):")) complain = 0;
   2432     # if(match(prev, "&& *$")) complain = 0;
   2433     # if(match(prev, "\\|\\| *$")) complain = 0;
   2434     # if(match(prev, "[\",=><] *$")) complain = 0;
   2435     # if(match($0, " <<")) complain = 0;
   2436     # if(match(prev, " +for \\(")) complain = 0;
   2437     # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
   2438     initial_spaces = 0
   2439     cleansed_line = clean_lines.elided[line_number]
   2440     while initial_spaces < len(line) and line[initial_spaces] == ' ':
   2441         initial_spaces += 1
   2442     if line and line[-1].isspace():
   2443         error(line_number, 'whitespace/end_of_line', 4,
   2444               'Line ends in whitespace.  Consider deleting these extra spaces.')
   2445     # There are certain situations we allow one space, notably for labels
   2446     elif ((initial_spaces >= 1 and initial_spaces <= 3)
   2447           and not match(r'\s*\w+\s*:\s*$', cleansed_line)):
   2448         error(line_number, 'whitespace/indent', 3,
   2449               'Weird number of spaces at line-start.  '
   2450               'Are you using a 4-space indent?')
   2451     # Labels should always be indented at least one space.
   2452     elif not initial_spaces and line[:2] != '//':
   2453         label_match = match(r'(?P<label>[^:]+):\s*$', line)
   2454 
   2455         if label_match:
   2456             label = label_match.group('label')
   2457             # Only throw errors for stuff that is definitely not a goto label,
   2458             # because goto labels can in fact occur at the start of the line.
   2459             if label in ['public', 'private', 'protected'] or label.find(' ') != -1:
   2460                 error(line_number, 'whitespace/labels', 4,
   2461                       'Labels should always be indented at least one space.  '
   2462                       'If this is a member-initializer list in a constructor, '
   2463                       'the colon should be on the line after the definition header.')
   2464 
   2465     if (cleansed_line.count(';') > 1
   2466         # for loops are allowed two ;'s (and may run over two lines).
   2467         and cleansed_line.find('for') == -1
   2468         and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
   2469              or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
   2470         # It's ok to have many commands in a switch case that fits in 1 line
   2471         and not ((cleansed_line.find('case ') != -1
   2472                   or cleansed_line.find('default:') != -1)
   2473                  and cleansed_line.find('break;') != -1)
   2474         # Also it's ok to have many commands in trivial single-line accessors in class definitions.
   2475         and not (match(r'.*\(.*\).*{.*.}', line)
   2476                  and class_state.classinfo_stack
   2477                  and line.count('{') == line.count('}'))
   2478         and not cleansed_line.startswith('#define ')):
   2479         error(line_number, 'whitespace/newline', 4,
   2480               'More than one command on the same line')
   2481 
   2482     if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
   2483         error(line_number, 'whitespace/operators', 4,
   2484               'Boolean expressions that span multiple lines should have their '
   2485               'operators on the left side of the line instead of the right side.')
   2486 
   2487     # Some more style checks
   2488     check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error)
   2489     check_using_std(clean_lines, line_number, file_state, error)
   2490     check_max_min_macros(clean_lines, line_number, file_state, error)
   2491     check_switch_indentation(clean_lines, line_number, error)
   2492     check_braces(clean_lines, line_number, error)
   2493     check_exit_statement_simplifications(clean_lines, line_number, error)
   2494     check_spacing(file_extension, clean_lines, line_number, error)
   2495     check_check(clean_lines, line_number, error)
   2496     check_for_comparisons_to_zero(clean_lines, line_number, error)
   2497     check_for_null(clean_lines, line_number, file_state, error)
   2498 
   2499 
   2500 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
   2501 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
   2502 # Matches the first component of a filename delimited by -s and _s. That is:
   2503 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
   2504 #  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
   2505 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
   2506 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
   2507 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
   2508 
   2509 
   2510 def _drop_common_suffixes(filename):
   2511     """Drops common suffixes like _test.cpp or -inl.h from filename.
   2512 
   2513     For example:
   2514       >>> _drop_common_suffixes('foo/foo-inl.h')
   2515       'foo/foo'
   2516       >>> _drop_common_suffixes('foo/bar/foo.cpp')
   2517       'foo/bar/foo'
   2518       >>> _drop_common_suffixes('foo/foo_internal.h')
   2519       'foo/foo'
   2520       >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
   2521       'foo/foo_unusualinternal'
   2522 
   2523     Args:
   2524       filename: The input filename.
   2525 
   2526     Returns:
   2527       The filename with the common suffix removed.
   2528     """
   2529     for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
   2530                    'inl.h', 'impl.h', 'internal.h'):
   2531         if (filename.endswith(suffix) and len(filename) > len(suffix)
   2532             and filename[-len(suffix) - 1] in ('-', '_')):
   2533             return filename[:-len(suffix) - 1]
   2534     return os.path.splitext(filename)[0]
   2535 
   2536 
   2537 def _classify_include(filename, include, is_system, include_state):
   2538     """Figures out what kind of header 'include' is.
   2539 
   2540     Args:
   2541       filename: The current file cpp_style is running over.
   2542       include: The path to a #included file.
   2543       is_system: True if the #include used <> rather than "".
   2544       include_state: An _IncludeState instance in which the headers are inserted.
   2545 
   2546     Returns:
   2547       One of the _XXX_HEADER constants.
   2548 
   2549     For example:
   2550       >>> _classify_include('foo.cpp', 'config.h', False)
   2551       _CONFIG_HEADER
   2552       >>> _classify_include('foo.cpp', 'foo.h', False)
   2553       _PRIMARY_HEADER
   2554       >>> _classify_include('foo.cpp', 'bar.h', False)
   2555       _OTHER_HEADER
   2556     """
   2557 
   2558     # If it is a system header we know it is classified as _OTHER_HEADER.
   2559     if is_system:
   2560         return _OTHER_HEADER
   2561 
   2562     # If the include is named config.h then this is WebCore/config.h.
   2563     if include == "config.h":
   2564         return _CONFIG_HEADER
   2565 
   2566     # There cannot be primary includes in header files themselves. Only an
   2567     # include exactly matches the header filename will be is flagged as
   2568     # primary, so that it triggers the "don't include yourself" check.
   2569     if filename.endswith('.h') and filename != include:
   2570         return _OTHER_HEADER;
   2571 
   2572     # Qt's moc files do not follow the naming and ordering rules, so they should be skipped
   2573     if include.startswith('moc_') and include.endswith('.cpp'):
   2574         return _MOC_HEADER
   2575 
   2576     if include.endswith('.moc'):
   2577         return _MOC_HEADER
   2578 
   2579     # If the target file basename starts with the include we're checking
   2580     # then we consider it the primary header.
   2581     target_base = FileInfo(filename).base_name()
   2582     include_base = FileInfo(include).base_name()
   2583 
   2584     # If we haven't encountered a primary header, then be lenient in checking.
   2585     if not include_state.visited_primary_section() and target_base.find(include_base) != -1:
   2586         return _PRIMARY_HEADER
   2587     # If we already encountered a primary header, perform a strict comparison.
   2588     # In case the two filename bases are the same then the above lenient check
   2589     # probably was a false positive.
   2590     elif include_state.visited_primary_section() and target_base == include_base:
   2591         if include == "ResourceHandleWin.h":
   2592             # FIXME: Thus far, we've only seen one example of these, but if we
   2593             # start to see more, please consider generalizing this check
   2594             # somehow.
   2595             return _OTHER_HEADER
   2596         return _PRIMARY_HEADER
   2597 
   2598     return _OTHER_HEADER
   2599 
   2600 
   2601 def _does_primary_header_exist(filename):
   2602     """Return a primary header file name for a file, or empty string
   2603     if the file is not source file or primary header does not exist.
   2604     """
   2605     fileinfo = FileInfo(filename)
   2606     if not fileinfo.is_source():
   2607         return False
   2608     primary_header = fileinfo.no_extension() + ".h"
   2609     return os.path.isfile(primary_header)
   2610 
   2611 
   2612 def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error):
   2613     """Check rules that are applicable to #include lines.
   2614 
   2615     Strings on #include lines are NOT removed from elided line, to make
   2616     certain tasks easier. However, to prevent false positives, checks
   2617     applicable to #include lines in CheckLanguage must be put here.
   2618 
   2619     Args:
   2620       filename: The name of the current file.
   2621       file_extension: The current file extension, without the leading dot.
   2622       clean_lines: A CleansedLines instance containing the file.
   2623       line_number: The number of the line to check.
   2624       include_state: An _IncludeState instance in which the headers are inserted.
   2625       error: The function to call with any errors found.
   2626     """
   2627     # FIXME: For readability or as a possible optimization, consider
   2628     #        exiting early here by checking whether the "build/include"
   2629     #        category should be checked for the given filename.  This
   2630     #        may involve having the error handler classes expose a
   2631     #        should_check() method, in addition to the usual __call__
   2632     #        method.
   2633     line = clean_lines.lines[line_number]
   2634 
   2635     matched = _RE_PATTERN_INCLUDE.search(line)
   2636     if not matched:
   2637         return
   2638 
   2639     include = matched.group(2)
   2640     is_system = (matched.group(1) == '<')
   2641 
   2642     # Look for any of the stream classes that are part of standard C++.
   2643     if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
   2644         error(line_number, 'readability/streams', 3,
   2645               'Streams are highly discouraged.')
   2646 
   2647     # Look for specific includes to fix.
   2648     if include.startswith('wtf/') and not is_system:
   2649         error(line_number, 'build/include', 4,
   2650               'wtf includes should be <wtf/file.h> instead of "wtf/file.h".')
   2651 
   2652     duplicate_header = include in include_state
   2653     if duplicate_header:
   2654         error(line_number, 'build/include', 4,
   2655               '"%s" already included at %s:%s' %
   2656               (include, filename, include_state[include]))
   2657     else:
   2658         include_state[include] = line_number
   2659 
   2660     header_type = _classify_include(filename, include, is_system, include_state)
   2661     primary_header_exists = _does_primary_header_exist(filename)
   2662     include_state.header_types[line_number] = header_type
   2663 
   2664     # Only proceed if this isn't a duplicate header.
   2665     if duplicate_header:
   2666         return
   2667 
   2668     # We want to ensure that headers appear in the right order:
   2669     # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
   2670     # 2) for header files: alphabetically sorted
   2671     # The include_state object keeps track of the last type seen
   2672     # and complains if the header types are out of order or missing.
   2673     error_message = include_state.check_next_include_order(header_type,
   2674                                                            file_extension == "h",
   2675                                                            primary_header_exists)
   2676 
   2677     # Check to make sure we have a blank line after primary header.
   2678     if not error_message and header_type == _PRIMARY_HEADER:
   2679          next_line = clean_lines.raw_lines[line_number + 1]
   2680          if not is_blank_line(next_line):
   2681             error(line_number, 'build/include_order', 4,
   2682                   'You should add a blank line after implementation file\'s own header.')
   2683 
   2684     # Check to make sure all headers besides config.h and the primary header are
   2685     # alphabetically sorted. Skip Qt's moc files.
   2686     if not error_message and header_type == _OTHER_HEADER:
   2687          previous_line_number = line_number - 1;
   2688          previous_line = clean_lines.lines[previous_line_number]
   2689          previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   2690          while (not previous_match and previous_line_number > 0
   2691                 and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
   2692             previous_line_number -= 1;
   2693             previous_line = clean_lines.lines[previous_line_number]
   2694             previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   2695          if previous_match:
   2696             previous_header_type = include_state.header_types[previous_line_number]
   2697             if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
   2698                 error(line_number, 'build/include_order', 4,
   2699                       'Alphabetical sorting problem.')
   2700 
   2701     if error_message:
   2702         if file_extension == 'h':
   2703             error(line_number, 'build/include_order', 4,
   2704                   '%s Should be: alphabetically sorted.' %
   2705                   error_message)
   2706         else:
   2707             error(line_number, 'build/include_order', 4,
   2708                   '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
   2709                   error_message)
   2710 
   2711 
   2712 def check_language(filename, clean_lines, line_number, file_extension, include_state,
   2713                    file_state, error):
   2714     """Checks rules from the 'C++ language rules' section of cppguide.html.
   2715 
   2716     Some of these rules are hard to test (function overloading, using
   2717     uint32 inappropriately), but we do the best we can.
   2718 
   2719     Args:
   2720       filename: The name of the current file.
   2721       clean_lines: A CleansedLines instance containing the file.
   2722       line_number: The number of the line to check.
   2723       file_extension: The extension (without the dot) of the filename.
   2724       include_state: An _IncludeState instance in which the headers are inserted.
   2725       file_state: A _FileState instance which maintains information about
   2726                   the state of things in the file.
   2727       error: The function to call with any errors found.
   2728     """
   2729     # If the line is empty or consists of entirely a comment, no need to
   2730     # check it.
   2731     line = clean_lines.elided[line_number]
   2732     if not line:
   2733         return
   2734 
   2735     matched = _RE_PATTERN_INCLUDE.search(line)
   2736     if matched:
   2737         check_include_line(filename, file_extension, clean_lines, line_number, include_state, error)
   2738         return
   2739 
   2740     # FIXME: figure out if they're using default arguments in fn proto.
   2741 
   2742     # Check to see if they're using an conversion function cast.
   2743     # I just try to capture the most common basic types, though there are more.
   2744     # Parameterless conversion functions, such as bool(), are allowed as they are
   2745     # probably a member operator declaration or default constructor.
   2746     matched = search(
   2747         r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
   2748     if matched:
   2749         # gMock methods are defined using some variant of MOCK_METHODx(name, type)
   2750         # where type may be float(), int(string), etc.  Without context they are
   2751         # virtually indistinguishable from int(x) casts.
   2752         if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
   2753             error(line_number, 'readability/casting', 4,
   2754                   'Using deprecated casting style.  '
   2755                   'Use static_cast<%s>(...) instead' %
   2756                   matched.group(1))
   2757 
   2758     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   2759                        'static_cast',
   2760                        r'\((int|float|double|bool|char|u?int(16|32|64))\)',
   2761                        error)
   2762     # This doesn't catch all cases.  Consider (const char * const)"hello".
   2763     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   2764                        'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
   2765 
   2766     # In addition, we look for people taking the address of a cast.  This
   2767     # is dangerous -- casts can assign to temporaries, so the pointer doesn't
   2768     # point where you think.
   2769     if search(
   2770         r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
   2771         error(line_number, 'runtime/casting', 4,
   2772               ('Are you taking an address of a cast?  '
   2773                'This is dangerous: could be a temp var.  '
   2774                'Take the address before doing the cast, rather than after'))
   2775 
   2776     # Check for people declaring static/global STL strings at the top level.
   2777     # This is dangerous because the C++ language does not guarantee that
   2778     # globals with constructors are initialized before the first access.
   2779     matched = match(
   2780         r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
   2781         line)
   2782     # Make sure it's not a function.
   2783     # Function template specialization looks like: "string foo<Type>(...".
   2784     # Class template definitions look like: "string Foo<Type>::Method(...".
   2785     if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
   2786                              matched.group(3)):
   2787         error(line_number, 'runtime/string', 4,
   2788               'For a static/global string constant, use a C style string instead: '
   2789               '"%schar %s[]".' %
   2790               (matched.group(1), matched.group(2)))
   2791 
   2792     # Check that we're not using RTTI outside of testing code.
   2793     if search(r'\bdynamic_cast<', line):
   2794         error(line_number, 'runtime/rtti', 5,
   2795               'Do not use dynamic_cast<>.  If you need to cast within a class '
   2796               "hierarchy, use static_cast<> to upcast.  Google doesn't support "
   2797               'RTTI.')
   2798 
   2799     if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
   2800         error(line_number, 'runtime/init', 4,
   2801               'You seem to be initializing a member variable with itself.')
   2802 
   2803     if file_extension == 'h':
   2804         # FIXME: check that 1-arg constructors are explicit.
   2805         #        How to tell it's a constructor?
   2806         #        (handled in check_for_non_standard_constructs for now)
   2807         pass
   2808 
   2809     # Check if people are using the verboten C basic types.  The only exception
   2810     # we regularly allow is "unsigned short port" for port.
   2811     if search(r'\bshort port\b', line):
   2812         if not search(r'\bunsigned short port\b', line):
   2813             error(line_number, 'runtime/int', 4,
   2814                   'Use "unsigned short" for ports, not "short"')
   2815 
   2816     # When snprintf is used, the second argument shouldn't be a literal.
   2817     matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
   2818     if matched:
   2819         error(line_number, 'runtime/printf', 3,
   2820               'If you can, use sizeof(%s) instead of %s as the 2nd arg '
   2821               'to snprintf.' % (matched.group(1), matched.group(2)))
   2822 
   2823     # Check if some verboten C functions are being used.
   2824     if search(r'\bsprintf\b', line):
   2825         error(line_number, 'runtime/printf', 5,
   2826               'Never use sprintf.  Use snprintf instead.')
   2827     matched = search(r'\b(strcpy|strcat)\b', line)
   2828     if matched:
   2829         error(line_number, 'runtime/printf', 4,
   2830               'Almost always, snprintf is better than %s' % matched.group(1))
   2831 
   2832     if search(r'\bsscanf\b', line):
   2833         error(line_number, 'runtime/printf', 1,
   2834               'sscanf can be ok, but is slow and can overflow buffers.')
   2835 
   2836     # Check for suspicious usage of "if" like
   2837     # } if (a == b) {
   2838     if search(r'\}\s*if\s*\(', line):
   2839         error(line_number, 'readability/braces', 4,
   2840               'Did you mean "else if"? If not, start a new line for "if".')
   2841 
   2842     # Check for potential format string bugs like printf(foo).
   2843     # We constrain the pattern not to pick things like DocidForPrintf(foo).
   2844     # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
   2845     matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
   2846     if matched:
   2847         error(line_number, 'runtime/printf', 4,
   2848               'Potential format string bug. Do %s("%%s", %s) instead.'
   2849               % (matched.group(1), matched.group(2)))
   2850 
   2851     # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
   2852     matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
   2853     if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
   2854         error(line_number, 'runtime/memset', 4,
   2855               'Did you mean "memset(%s, 0, %s)"?'
   2856               % (matched.group(1), matched.group(2)))
   2857 
   2858     # Detect variable-length arrays.
   2859     matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
   2860     if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
   2861         matched.group(3).find(']') == -1):
   2862         # Split the size using space and arithmetic operators as delimiters.
   2863         # If any of the resulting tokens are not compile time constants then
   2864         # report the error.
   2865         tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
   2866         is_const = True
   2867         skip_next = False
   2868         for tok in tokens:
   2869             if skip_next:
   2870                 skip_next = False
   2871                 continue
   2872 
   2873             if search(r'sizeof\(.+\)', tok):
   2874                 continue
   2875             if search(r'arraysize\(\w+\)', tok):
   2876                 continue
   2877 
   2878             tok = tok.lstrip('(')
   2879             tok = tok.rstrip(')')
   2880             if not tok:
   2881                 continue
   2882             if match(r'\d+', tok):
   2883                 continue
   2884             if match(r'0[xX][0-9a-fA-F]+', tok):
   2885                 continue
   2886             if match(r'k[A-Z0-9]\w*', tok):
   2887                 continue
   2888             if match(r'(.+::)?k[A-Z0-9]\w*', tok):
   2889                 continue
   2890             if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
   2891                 continue
   2892             # A catch all for tricky sizeof cases, including 'sizeof expression',
   2893             # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
   2894             # requires skipping the next token becasue we split on ' ' and '*'.
   2895             if tok.startswith('sizeof'):
   2896                 skip_next = True
   2897                 continue
   2898             is_const = False
   2899             break
   2900         if not is_const:
   2901             error(line_number, 'runtime/arrays', 1,
   2902                   'Do not use variable-length arrays.  Use an appropriately named '
   2903                   "('k' followed by CamelCase) compile-time constant for the size.")
   2904 
   2905     # Check for use of unnamed namespaces in header files.  Registration
   2906     # macros are typically OK, so we allow use of "namespace {" on lines
   2907     # that end with backslashes.
   2908     if (file_extension == 'h'
   2909         and search(r'\bnamespace\s*{', line)
   2910         and line[-1] != '\\'):
   2911         error(line_number, 'build/namespaces', 4,
   2912               'Do not use unnamed namespaces in header files.  See '
   2913               'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
   2914               ' for more information.')
   2915 
   2916     check_identifier_name_in_declaration(filename, line_number, line, file_state, error)
   2917 
   2918 
   2919 def check_identifier_name_in_declaration(filename, line_number, line, file_state, error):
   2920     """Checks if identifier names contain any underscores.
   2921 
   2922     As identifiers in libraries we are using have a bunch of
   2923     underscores, we only warn about the declarations of identifiers
   2924     and don't check use of identifiers.
   2925 
   2926     Args:
   2927       filename: The name of the current file.
   2928       line_number: The number of the line to check.
   2929       line: The line of code to check.
   2930       file_state: A _FileState instance which maintains information about
   2931                   the state of things in the file.
   2932       error: The function to call with any errors found.
   2933     """
   2934     # We don't check a return statement.
   2935     if match(r'\s*(return|delete)\b', line):
   2936         return
   2937 
   2938     # Basically, a declaration is a type name followed by whitespaces
   2939     # followed by an identifier. The type name can be complicated
   2940     # due to type adjectives and templates. We remove them first to
   2941     # simplify the process to find declarations of identifiers.
   2942 
   2943     # Convert "long long", "long double", and "long long int" to
   2944     # simple types, but don't remove simple "long".
   2945     line = sub(r'long (long )?(?=long|double|int)', '', line)
   2946     # Convert unsigned/signed types to simple types, too.
   2947     line = sub(r'(unsigned|signed) (?=char|short|int|long)', '', line)
   2948     line = sub(r'\b(inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line)
   2949 
   2950     # Remove "new" and "new (expr)" to simplify, too.
   2951     line = sub(r'new\s*(\([^)]*\))?', '', line)
   2952 
   2953     # Remove all template parameters by removing matching < and >.
   2954     # Loop until no templates are removed to remove nested templates.
   2955     while True:
   2956         line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line)
   2957         if not number_of_replacements:
   2958             break
   2959 
   2960     # Declarations of local variables can be in condition expressions
   2961     # of control flow statements (e.g., "if (RenderObject* p = o->parent())").
   2962     # We remove the keywords and the first parenthesis.
   2963     #
   2964     # Declarations in "while", "if", and "switch" are different from
   2965     # other declarations in two aspects:
   2966     #
   2967     # - There can be only one declaration between the parentheses.
   2968     #   (i.e., you cannot write "if (int i = 0, j = 1) {}")
   2969     # - The variable must be initialized.
   2970     #   (i.e., you cannot write "if (int i) {}")
   2971     #
   2972     # and we will need different treatments for them.
   2973     line = sub(r'^\s*for\s*\(', '', line)
   2974     line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line)
   2975 
   2976     # Detect variable and functions.
   2977     type_regexp = r'\w([\w]|\s*[*&]\s*|::)+'
   2978     identifier_regexp = r'(?P<identifier>[\w:]+)'
   2979     maybe_bitfield_regexp = r'(:\s*\d+\s*)?'
   2980     character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)'
   2981     declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*' + maybe_bitfield_regexp + character_after_identifier_regexp
   2982     declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp
   2983     is_function_arguments = False
   2984     number_of_identifiers = 0
   2985     while True:
   2986         # If we are seeing the first identifier or arguments of a
   2987         # function, there should be a type name before an identifier.
   2988         if not number_of_identifiers or is_function_arguments:
   2989             declaration_regexp = declaration_with_type_regexp
   2990         else:
   2991             declaration_regexp = declaration_without_type_regexp
   2992 
   2993         matched = match(declaration_regexp, line)
   2994         if not matched:
   2995             return
   2996         identifier = matched.group('identifier')
   2997         character_after_identifier = matched.group('character_after_identifier')
   2998 
   2999         # If we removed a non-for-control statement, the character after
   3000         # the identifier should be '='. With this rule, we can avoid
   3001         # warning for cases like "if (val & INT_MAX) {".
   3002         if control_statement and character_after_identifier != '=':
   3003             return
   3004 
   3005         is_function_arguments = is_function_arguments or character_after_identifier == '('
   3006 
   3007         # Remove "m_" and "s_" to allow them.
   3008         modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier)
   3009         if not file_state.is_objective_c() and modified_identifier.find('_') >= 0:
   3010             # Various exceptions to the rule: JavaScript op codes functions, const_iterator.
   3011             if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('op_') >= 0)
   3012                 and not modified_identifier.startswith('tst_')
   3013                 and not modified_identifier.startswith('webkit_dom_object_')
   3014                 and not modified_identifier.startswith('NPN_')
   3015                 and not modified_identifier.startswith('NPP_')
   3016                 and not modified_identifier.startswith('NP_')
   3017                 and not modified_identifier.startswith('qt_')
   3018                 and not modified_identifier.startswith('cairo_')
   3019                 and not modified_identifier.find('::qt_') >= 0
   3020                 and not modified_identifier == "const_iterator"
   3021                 and not modified_identifier == "vm_throw"):
   3022                 error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.")
   3023 
   3024         # Check for variables named 'l', these are too easy to confuse with '1' in some fonts
   3025         if modified_identifier == 'l':
   3026             error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.")
   3027 
   3028         # There can be only one declaration in non-for-control statements.
   3029         if control_statement:
   3030             return
   3031         # We should continue checking if this is a function
   3032         # declaration because we need to check its arguments.
   3033         # Also, we need to check multiple declarations.
   3034         if character_after_identifier != '(' and character_after_identifier != ',':
   3035             return
   3036 
   3037         number_of_identifiers += 1
   3038         line = line[matched.end():]
   3039 
   3040 def check_c_style_cast(line_number, line, raw_line, cast_type, pattern,
   3041                        error):
   3042     """Checks for a C-style cast by looking for the pattern.
   3043 
   3044     This also handles sizeof(type) warnings, due to similarity of content.
   3045 
   3046     Args:
   3047       line_number: The number of the line to check.
   3048       line: The line of code to check.
   3049       raw_line: The raw line of code to check, with comments.
   3050       cast_type: The string for the C++ cast to recommend.  This is either
   3051                  reinterpret_cast or static_cast, depending.
   3052       pattern: The regular expression used to find C-style casts.
   3053       error: The function to call with any errors found.
   3054     """
   3055     matched = search(pattern, line)
   3056     if not matched:
   3057         return
   3058 
   3059     # e.g., sizeof(int)
   3060     sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
   3061     if sizeof_match:
   3062         error(line_number, 'runtime/sizeof', 1,
   3063               'Using sizeof(type).  Use sizeof(varname) instead if possible')
   3064         return
   3065 
   3066     remainder = line[matched.end(0):]
   3067 
   3068     # The close paren is for function pointers as arguments to a function.
   3069     # eg, void foo(void (*bar)(int));
   3070     # The semicolon check is a more basic function check; also possibly a
   3071     # function pointer typedef.
   3072     # eg, void foo(int); or void foo(int) const;
   3073     # The equals check is for function pointer assignment.
   3074     # eg, void *(*foo)(int) = ...
   3075     #
   3076     # Right now, this will only catch cases where there's a single argument, and
   3077     # it's unnamed.  It should probably be expanded to check for multiple
   3078     # arguments with some unnamed.
   3079     function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
   3080     if function_match:
   3081         if (not function_match.group(3)
   3082             or function_match.group(3) == ';'
   3083             or raw_line.find('/*') < 0):
   3084             error(line_number, 'readability/function', 3,
   3085                   'All parameters should be named in a function')
   3086         return
   3087 
   3088     # At this point, all that should be left is actual casts.
   3089     error(line_number, 'readability/casting', 4,
   3090           'Using C-style cast.  Use %s<%s>(...) instead' %
   3091           (cast_type, matched.group(1)))
   3092 
   3093 
   3094 _HEADERS_CONTAINING_TEMPLATES = (
   3095     ('<deque>', ('deque',)),
   3096     ('<functional>', ('unary_function', 'binary_function',
   3097                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
   3098                       'negate',
   3099                       'equal_to', 'not_equal_to', 'greater', 'less',
   3100                       'greater_equal', 'less_equal',
   3101                       'logical_and', 'logical_or', 'logical_not',
   3102                       'unary_negate', 'not1', 'binary_negate', 'not2',
   3103                       'bind1st', 'bind2nd',
   3104                       'pointer_to_unary_function',
   3105                       'pointer_to_binary_function',
   3106                       'ptr_fun',
   3107                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
   3108                       'mem_fun_ref_t',
   3109                       'const_mem_fun_t', 'const_mem_fun1_t',
   3110                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
   3111                       'mem_fun_ref',
   3112                      )),
   3113     ('<limits>', ('numeric_limits',)),
   3114     ('<list>', ('list',)),
   3115     ('<map>', ('map', 'multimap',)),
   3116     ('<memory>', ('allocator',)),
   3117     ('<queue>', ('queue', 'priority_queue',)),
   3118     ('<set>', ('set', 'multiset',)),
   3119     ('<stack>', ('stack',)),
   3120     ('<string>', ('char_traits', 'basic_string',)),
   3121     ('<utility>', ('pair',)),
   3122     ('<vector>', ('vector',)),
   3123 
   3124     # gcc extensions.
   3125     # Note: std::hash is their hash, ::hash is our hash
   3126     ('<hash_map>', ('hash_map', 'hash_multimap',)),
   3127     ('<hash_set>', ('hash_set', 'hash_multiset',)),
   3128     ('<slist>', ('slist',)),
   3129     )
   3130 
   3131 _HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
   3132     # We can trust with reasonable confidence that map gives us pair<>, too.
   3133     'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
   3134 }
   3135 
   3136 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
   3137 
   3138 _re_pattern_algorithm_header = []
   3139 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
   3140                   'transform'):
   3141     # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   3142     # type::max().
   3143     _re_pattern_algorithm_header.append(
   3144         (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
   3145          _template,
   3146          '<algorithm>'))
   3147 
   3148 _re_pattern_templates = []
   3149 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
   3150     for _template in _templates:
   3151         _re_pattern_templates.append(
   3152             (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
   3153              _template + '<>',
   3154              _header))
   3155 
   3156 
   3157 def files_belong_to_same_module(filename_cpp, filename_h):
   3158     """Check if these two filenames belong to the same module.
   3159 
   3160     The concept of a 'module' here is a as follows:
   3161     foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
   3162     same 'module' if they are in the same directory.
   3163     some/path/public/xyzzy and some/path/internal/xyzzy are also considered
   3164     to belong to the same module here.
   3165 
   3166     If the filename_cpp contains a longer path than the filename_h, for example,
   3167     '/absolute/path/to/base/sysinfo.cpp', and this file would include
   3168     'base/sysinfo.h', this function also produces the prefix needed to open the
   3169     header. This is used by the caller of this function to more robustly open the
   3170     header file. We don't have access to the real include paths in this context,
   3171     so we need this guesswork here.
   3172 
   3173     Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
   3174     according to this implementation. Because of this, this function gives
   3175     some false positives. This should be sufficiently rare in practice.
   3176 
   3177     Args:
   3178       filename_cpp: is the path for the .cpp file
   3179       filename_h: is the path for the header path
   3180 
   3181     Returns:
   3182       Tuple with a bool and a string:
   3183       bool: True if filename_cpp and filename_h belong to the same module.
   3184       string: the additional prefix needed to open the header file.
   3185     """
   3186 
   3187     if not filename_cpp.endswith('.cpp'):
   3188         return (False, '')
   3189     filename_cpp = filename_cpp[:-len('.cpp')]
   3190     if filename_cpp.endswith('_unittest'):
   3191         filename_cpp = filename_cpp[:-len('_unittest')]
   3192     elif filename_cpp.endswith('_test'):
   3193         filename_cpp = filename_cpp[:-len('_test')]
   3194     filename_cpp = filename_cpp.replace('/public/', '/')
   3195     filename_cpp = filename_cpp.replace('/internal/', '/')
   3196 
   3197     if not filename_h.endswith('.h'):
   3198         return (False, '')
   3199     filename_h = filename_h[:-len('.h')]
   3200     if filename_h.endswith('-inl'):
   3201         filename_h = filename_h[:-len('-inl')]
   3202     filename_h = filename_h.replace('/public/', '/')
   3203     filename_h = filename_h.replace('/internal/', '/')
   3204 
   3205     files_belong_to_same_module = filename_cpp.endswith(filename_h)
   3206     common_path = ''
   3207     if files_belong_to_same_module:
   3208         common_path = filename_cpp[:-len(filename_h)]
   3209     return files_belong_to_same_module, common_path
   3210 
   3211 
   3212 def update_include_state(filename, include_state, io=codecs):
   3213     """Fill up the include_state with new includes found from the file.
   3214 
   3215     Args:
   3216       filename: the name of the header to read.
   3217       include_state: an _IncludeState instance in which the headers are inserted.
   3218       io: The io factory to use to read the file. Provided for testability.
   3219 
   3220     Returns:
   3221       True if a header was succesfully added. False otherwise.
   3222     """
   3223     io = _unit_test_config.get(INCLUDE_IO_INJECTION_KEY, codecs)
   3224     header_file = None
   3225     try:
   3226         header_file = io.open(filename, 'r', 'utf8', 'replace')
   3227     except IOError:
   3228         return False
   3229     line_number = 0
   3230     for line in header_file:
   3231         line_number += 1
   3232         clean_line = cleanse_comments(line)
   3233         matched = _RE_PATTERN_INCLUDE.search(clean_line)
   3234         if matched:
   3235             include = matched.group(2)
   3236             # The value formatting is cute, but not really used right now.
   3237             # What matters here is that the key is in include_state.
   3238             include_state.setdefault(include, '%s:%d' % (filename, line_number))
   3239     return True
   3240 
   3241 
   3242 def check_for_include_what_you_use(filename, clean_lines, include_state, error):
   3243     """Reports for missing stl includes.
   3244 
   3245     This function will output warnings to make sure you are including the headers
   3246     necessary for the stl containers and functions that you use. We only give one
   3247     reason to include a header. For example, if you use both equal_to<> and
   3248     less<> in a .h file, only one (the latter in the file) of these will be
   3249     reported as a reason to include the <functional>.
   3250 
   3251     Args:
   3252       filename: The name of the current file.
   3253       clean_lines: A CleansedLines instance containing the file.
   3254       include_state: An _IncludeState instance.
   3255       error: The function to call with any errors found.
   3256     """
   3257     required = {}  # A map of header name to line_number and the template entity.
   3258         # Example of required: { '<functional>': (1219, 'less<>') }
   3259 
   3260     for line_number in xrange(clean_lines.num_lines()):
   3261         line = clean_lines.elided[line_number]
   3262         if not line or line[0] == '#':
   3263             continue
   3264 
   3265         # String is special -- it is a non-templatized type in STL.
   3266         if _RE_PATTERN_STRING.search(line):
   3267             required['<string>'] = (line_number, 'string')
   3268 
   3269         for pattern, template, header in _re_pattern_algorithm_header:
   3270             if pattern.search(line):
   3271                 required[header] = (line_number, template)
   3272 
   3273         # The following function is just a speed up, no semantics are changed.
   3274         if not '<' in line:  # Reduces the cpu time usage by skipping lines.
   3275             continue
   3276 
   3277         for pattern, template, header in _re_pattern_templates:
   3278             if pattern.search(line):
   3279                 required[header] = (line_number, template)
   3280 
   3281     # The policy is that if you #include something in foo.h you don't need to
   3282     # include it again in foo.cpp. Here, we will look at possible includes.
   3283     # Let's copy the include_state so it is only messed up within this function.
   3284     include_state = include_state.copy()
   3285 
   3286     # Did we find the header for this file (if any) and succesfully load it?
   3287     header_found = False
   3288 
   3289     # Use the absolute path so that matching works properly.
   3290     abs_filename = os.path.abspath(filename)
   3291 
   3292     # For Emacs's flymake.
   3293     # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
   3294     # by flymake and that file name might end with '_flymake.cpp'. In that case,
   3295     # restore original file name here so that the corresponding header file can be
   3296     # found.
   3297     # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
   3298     # instead of 'foo_flymake.h'
   3299     abs_filename = re.sub(r'_flymake\.cpp$', '.cpp', abs_filename)
   3300 
   3301     # include_state is modified during iteration, so we iterate over a copy of
   3302     # the keys.
   3303     for header in include_state.keys():  #NOLINT
   3304         (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
   3305         fullpath = common_path + header
   3306         if same_module and update_include_state(fullpath, include_state):
   3307             header_found = True
   3308 
   3309     # If we can't find the header file for a .cpp, assume it's because we don't
   3310     # know where to look. In that case we'll give up as we're not sure they
   3311     # didn't include it in the .h file.
   3312     # FIXME: Do a better job of finding .h files so we are confident that
   3313     #        not having the .h file means there isn't one.
   3314     if filename.endswith('.cpp') and not header_found:
   3315         return
   3316 
   3317     # All the lines have been processed, report the errors found.
   3318     for required_header_unstripped in required:
   3319         template = required[required_header_unstripped][1]
   3320         if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
   3321             headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
   3322             if [True for header in headers if header in include_state]:
   3323                 continue
   3324         if required_header_unstripped.strip('<>"') not in include_state:
   3325             error(required[required_header_unstripped][0],
   3326                   'build/include_what_you_use', 4,
   3327                   'Add #include ' + required_header_unstripped + ' for ' + template)
   3328 
   3329 
   3330 def process_line(filename, file_extension,
   3331                  clean_lines, line, include_state, function_state,
   3332                  class_state, file_state, error):
   3333     """Processes a single line in the file.
   3334 
   3335     Args:
   3336       filename: Filename of the file that is being processed.
   3337       file_extension: The extension (dot not included) of the file.
   3338       clean_lines: An array of strings, each representing a line of the file,
   3339                    with comments stripped.
   3340       line: Number of line being processed.
   3341       include_state: An _IncludeState instance in which the headers are inserted.
   3342       function_state: A _FunctionState instance which counts function lines, etc.
   3343       class_state: A _ClassState instance which maintains information about
   3344                    the current stack of nested class declarations being parsed.
   3345       file_state: A _FileState instance which maintains information about
   3346                   the state of things in the file.
   3347       error: A callable to which errors are reported, which takes arguments:
   3348              line number, error level, and message
   3349 
   3350     """
   3351     raw_lines = clean_lines.raw_lines
   3352     detect_functions(clean_lines, line, function_state, error)
   3353     check_for_function_lengths(clean_lines, line, function_state, error)
   3354     if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
   3355         return
   3356     check_function_definition(filename, file_extension, clean_lines, line, function_state, error)
   3357     check_pass_ptr_usage(clean_lines, line, function_state, error)
   3358     check_for_multiline_comments_and_strings(clean_lines, line, error)
   3359     check_style(clean_lines, line, file_extension, class_state, file_state, error)
   3360     check_language(filename, clean_lines, line, file_extension, include_state,
   3361                    file_state, error)
   3362     check_for_non_standard_constructs(clean_lines, line, class_state, error)
   3363     check_posix_threading(clean_lines, line, error)
   3364     check_invalid_increment(clean_lines, line, error)
   3365 
   3366 
   3367 def _process_lines(filename, file_extension, lines, error, min_confidence):
   3368     """Performs lint checks and reports any errors to the given error function.
   3369 
   3370     Args:
   3371       filename: Filename of the file that is being processed.
   3372       file_extension: The extension (dot not included) of the file.
   3373       lines: An array of strings, each representing a line of the file, with the
   3374              last element being empty if the file is termined with a newline.
   3375       error: A callable to which errors are reported, which takes 4 arguments:
   3376     """
   3377     lines = (['// marker so line numbers and indices both start at 1'] + lines +
   3378              ['// marker so line numbers end in a known way'])
   3379 
   3380     include_state = _IncludeState()
   3381     function_state = _FunctionState(min_confidence)
   3382     class_state = _ClassState()
   3383 
   3384     check_for_copyright(lines, error)
   3385 
   3386     if file_extension == 'h':
   3387         check_for_header_guard(filename, lines, error)
   3388 
   3389     remove_multi_line_comments(lines, error)
   3390     clean_lines = CleansedLines(lines)
   3391     file_state = _FileState(clean_lines, file_extension)
   3392     for line in xrange(clean_lines.num_lines()):
   3393         process_line(filename, file_extension, clean_lines, line,
   3394                      include_state, function_state, class_state, file_state, error)
   3395     class_state.check_finished(error)
   3396 
   3397     check_for_include_what_you_use(filename, clean_lines, include_state, error)
   3398 
   3399     # We check here rather than inside process_line so that we see raw
   3400     # lines rather than "cleaned" lines.
   3401     check_for_unicode_replacement_characters(lines, error)
   3402 
   3403     check_for_new_line_at_eof(lines, error)
   3404 
   3405 
   3406 class CppChecker(object):
   3407 
   3408     """Processes C++ lines for checking style."""
   3409 
   3410     # This list is used to--
   3411     #
   3412     # (1) generate an explicit list of all possible categories,
   3413     # (2) unit test that all checked categories have valid names, and
   3414     # (3) unit test that all categories are getting unit tested.
   3415     #
   3416     categories = set([
   3417         'build/class',
   3418         'build/deprecated',
   3419         'build/endif_comment',
   3420         'build/forward_decl',
   3421         'build/header_guard',
   3422         'build/include',
   3423         'build/include_order',
   3424         'build/include_what_you_use',
   3425         'build/namespaces',
   3426         'build/printf_format',
   3427         'build/storage_class',
   3428         'build/using_std',
   3429         'legal/copyright',
   3430         'readability/braces',
   3431         'readability/casting',
   3432         'readability/check',
   3433         'readability/comparison_to_zero',
   3434         'readability/constructors',
   3435         'readability/control_flow',
   3436         'readability/fn_size',
   3437         'readability/function',
   3438         'readability/multiline_comment',
   3439         'readability/multiline_string',
   3440         'readability/parameter_name',
   3441         'readability/naming',
   3442         'readability/null',
   3443         'readability/pass_ptr',
   3444         'readability/streams',
   3445         'readability/todo',
   3446         'readability/utf8',
   3447         'readability/webkit_api',
   3448         'runtime/arrays',
   3449         'runtime/casting',
   3450         'runtime/explicit',
   3451         'runtime/init',
   3452         'runtime/int',
   3453         'runtime/invalid_increment',
   3454         'runtime/max_min_macros',
   3455         'runtime/memset',
   3456         'runtime/printf',
   3457         'runtime/printf_format',
   3458         'runtime/references',
   3459         'runtime/rtti',
   3460         'runtime/sizeof',
   3461         'runtime/string',
   3462         'runtime/threadsafe_fn',
   3463         'runtime/virtual',
   3464         'whitespace/blank_line',
   3465         'whitespace/braces',
   3466         'whitespace/comma',
   3467         'whitespace/comments',
   3468         'whitespace/declaration',
   3469         'whitespace/end_of_line',
   3470         'whitespace/ending_newline',
   3471         'whitespace/indent',
   3472         'whitespace/labels',
   3473         'whitespace/line_length',
   3474         'whitespace/newline',
   3475         'whitespace/operators',
   3476         'whitespace/parens',
   3477         'whitespace/semicolon',
   3478         'whitespace/tab',
   3479         'whitespace/todo',
   3480         ])
   3481 
   3482     def __init__(self, file_path, file_extension, handle_style_error,
   3483                  min_confidence):
   3484         """Create a CppChecker instance.
   3485 
   3486         Args:
   3487           file_extension: A string that is the file extension, without
   3488                           the leading dot.
   3489 
   3490         """
   3491         self.file_extension = file_extension
   3492         self.file_path = file_path
   3493         self.handle_style_error = handle_style_error
   3494         self.min_confidence = min_confidence
   3495 
   3496     # Useful for unit testing.
   3497     def __eq__(self, other):
   3498         """Return whether this CppChecker instance is equal to another."""
   3499         if self.file_extension != other.file_extension:
   3500             return False
   3501         if self.file_path != other.file_path:
   3502             return False
   3503         if self.handle_style_error != other.handle_style_error:
   3504             return False
   3505         if self.min_confidence != other.min_confidence:
   3506             return False
   3507 
   3508         return True
   3509 
   3510     # Useful for unit testing.
   3511     def __ne__(self, other):
   3512         # Python does not automatically deduce __ne__() from __eq__().
   3513         return not self.__eq__(other)
   3514 
   3515     def check(self, lines):
   3516         _process_lines(self.file_path, self.file_extension, lines,
   3517                        self.handle_style_error, self.min_confidence)
   3518 
   3519 
   3520 # FIXME: Remove this function (requires refactoring unit tests).
   3521 def process_file_data(filename, file_extension, lines, error, min_confidence, unit_test_config):
   3522     global _unit_test_config
   3523     _unit_test_config = unit_test_config
   3524     checker = CppChecker(filename, file_extension, error, min_confidence)
   3525     checker.check(lines)
   3526     _unit_test_config = {}
   3527