Home | History | Annotate | Download | only in checkers
      1 # -*- coding: utf-8 -*-
      2 #
      3 # Copyright (C) 2009, 2010, 2012 Google Inc. All rights reserved.
      4 # Copyright (C) 2009 Torch Mobile Inc.
      5 # Copyright (C) 2009 Apple Inc. All rights reserved.
      6 # Copyright (C) 2010 Chris Jerdonek (cjerdonek (at] webkit.org)
      7 #
      8 # Redistribution and use in source and binary forms, with or without
      9 # modification, are permitted provided that the following conditions are
     10 # met:
     11 #
     12 #    * Redistributions of source code must retain the above copyright
     13 # notice, this list of conditions and the following disclaimer.
     14 #    * Redistributions in binary form must reproduce the above
     15 # copyright notice, this list of conditions and the following disclaimer
     16 # in the documentation and/or other materials provided with the
     17 # distribution.
     18 #    * Neither the name of Google Inc. nor the names of its
     19 # contributors may be used to endorse or promote products derived from
     20 # this software without specific prior written permission.
     21 #
     22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     23 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     24 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     25 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     26 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     27 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     28 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     29 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     30 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     31 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     32 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     33 
     34 # This is the modified version of Google's cpplint. The original code is
     35 # http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
     36 
     37 """Support for check-webkit-style."""
     38 
     39 import codecs
     40 import math  # for log
     41 import os
     42 import os.path
     43 import re
     44 import sre_compile
     45 import string
     46 import sys
     47 import unicodedata
     48 
     49 from webkitpy.common.memoized import memoized
     50 
     51 # The key to use to provide a class to fake loading a header file.
     52 INCLUDE_IO_INJECTION_KEY = 'include_header_io'
     53 
     54 # Headers that we consider STL headers.
     55 _STL_HEADERS = frozenset([
     56     'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
     57     'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
     58     'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
     59     'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
     60     'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
     61     'utility', 'vector', 'vector.h',
     62     ])
     63 
     64 
     65 # Non-STL C++ system headers.
     66 _CPP_HEADERS = frozenset([
     67     'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
     68     'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
     69     'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
     70     'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
     71     'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
     72     'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
     73     'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
     74     'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
     75     'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
     76     'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
     77     'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
     78     'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
     79     'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
     80     ])
     81 
     82 
     83 # Assertion macros.  These are defined in base/logging.h and
     84 # testing/base/gunit.h.  Note that the _M versions need to come first
     85 # for substring matching to work.
     86 _CHECK_MACROS = [
     87     'DCHECK', 'CHECK',
     88     'EXPECT_TRUE_M', 'EXPECT_TRUE',
     89     'ASSERT_TRUE_M', 'ASSERT_TRUE',
     90     'EXPECT_FALSE_M', 'EXPECT_FALSE',
     91     'ASSERT_FALSE_M', 'ASSERT_FALSE',
     92     ]
     93 
     94 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
     95 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
     96 
     97 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
     98                         ('>=', 'GE'), ('>', 'GT'),
     99                         ('<=', 'LE'), ('<', 'LT')]:
    100     _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
    101     _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
    102     _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    103     _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    104     _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
    105     _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
    106 
    107 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
    108                             ('>=', 'LT'), ('>', 'LE'),
    109                             ('<=', 'GT'), ('<', 'GE')]:
    110     _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    111     _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    112     _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
    113     _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
    114 
    115 
    116 # These constants define types of headers for use with
    117 # _IncludeState.check_next_include_order().
    118 _CONFIG_HEADER = 0
    119 _PRIMARY_HEADER = 1
    120 _OTHER_HEADER = 2
    121 _MOC_HEADER = 3
    122 
    123 
    124 # A dictionary of items customize behavior for unit test. For example,
    125 # INCLUDE_IO_INJECTION_KEY allows providing a custom io class which allows
    126 # for faking a header file.
    127 _unit_test_config = {}
    128 
    129 
    130 # The regexp compilation caching is inlined in all regexp functions for
    131 # performance reasons; factoring it out into a separate function turns out
    132 # to be noticeably expensive.
    133 _regexp_compile_cache = {}
    134 
    135 
    136 def match(pattern, s):
    137     """Matches the string with the pattern, caching the compiled regexp."""
    138     if not pattern in _regexp_compile_cache:
    139         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    140     return _regexp_compile_cache[pattern].match(s)
    141 
    142 
    143 def search(pattern, s):
    144     """Searches the string for the pattern, caching the compiled regexp."""
    145     if not pattern in _regexp_compile_cache:
    146         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    147     return _regexp_compile_cache[pattern].search(s)
    148 
    149 
    150 def sub(pattern, replacement, s):
    151     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    152     if not pattern in _regexp_compile_cache:
    153         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    154     return _regexp_compile_cache[pattern].sub(replacement, s)
    155 
    156 
    157 def subn(pattern, replacement, s):
    158     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    159     if not pattern in _regexp_compile_cache:
    160         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    161     return _regexp_compile_cache[pattern].subn(replacement, s)
    162 
    163 
    164 def iteratively_replace_matches_with_char(pattern, char_replacement, s):
    165     """Returns the string with replacement done.
    166 
    167     Every character in the match is replaced with char.
    168     Due to the iterative nature, pattern should not match char or
    169     there will be an infinite loop.
    170 
    171     Example:
    172       pattern = r'<[^>]>' # template parameters
    173       char_replacement =  '_'
    174       s =     'A<B<C, D>>'
    175       Returns 'A_________'
    176 
    177     Args:
    178       pattern: The regex to match.
    179       char_replacement: The character to put in place of every
    180                         character of the match.
    181       s: The string on which to do the replacements.
    182 
    183     Returns:
    184       True, if the given line is blank.
    185     """
    186     while True:
    187         matched = search(pattern, s)
    188         if not matched:
    189             return s
    190         start_match_index = matched.start(0)
    191         end_match_index = matched.end(0)
    192         match_length = end_match_index - start_match_index
    193         s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:]
    194 
    195 
    196 def _find_in_lines(regex, lines, start_position, not_found_position):
    197     """Does a find starting at start position and going forward until
    198     a match is found.
    199 
    200     Returns the position where the regex started.
    201     """
    202     current_row = start_position.row
    203 
    204     # Start with the given row and trim off everything before what should be matched.
    205     current_line = lines[start_position.row][start_position.column:]
    206     starting_offset = start_position.column
    207     while True:
    208         found_match = search(regex, current_line)
    209         if found_match:
    210             return Position(current_row, starting_offset + found_match.start())
    211 
    212         # A match was not found so continue forward.
    213         current_row += 1
    214         starting_offset = 0
    215         if current_row >= len(lines):
    216             return not_found_position
    217         current_line = lines[current_row]
    218 
    219 def _rfind_in_lines(regex, lines, start_position, not_found_position):
    220     """Does a reverse find starting at start position and going backwards until
    221     a match is found.
    222 
    223     Returns the position where the regex ended.
    224     """
    225     # Put the regex in a group and proceed it with a greedy expression that
    226     # matches anything to ensure that we get the last possible match in a line.
    227     last_in_line_regex = r'.*(' + regex + ')'
    228     current_row = start_position.row
    229 
    230     # Start with the given row and trim off everything past what may be matched.
    231     current_line = lines[start_position.row][:start_position.column]
    232     while True:
    233         found_match = match(last_in_line_regex, current_line)
    234         if found_match:
    235             return Position(current_row, found_match.end(1))
    236 
    237         # A match was not found so continue backward.
    238         current_row -= 1
    239         if current_row < 0:
    240             return not_found_position
    241         current_line = lines[current_row]
    242 
    243 
    244 def _convert_to_lower_with_underscores(text):
    245     """Converts all text strings in camelCase or PascalCase to lowers with underscores."""
    246 
    247     # First add underscores before any capital letter followed by a lower case letter
    248     # as long as it is in a word.
    249     # (This put an underscore before Password but not P and A in WPAPassword).
    250     text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text)
    251 
    252     # Next add underscores before capitals at the end of words if it was
    253     # preceeded by lower case letter or number.
    254     # (This puts an underscore before A in isA but not A in CBA).
    255     text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text)
    256 
    257     # Next add underscores when you have a captial letter which is followed by a capital letter
    258     # but is not proceeded by one. (This puts an underscore before A in 'WordADay').
    259     text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text)
    260 
    261     return text.lower()
    262 
    263 
    264 
    265 def _create_acronym(text):
    266     """Creates an acronym for the given text."""
    267     # Removes all lower case letters except those starting words.
    268     text = sub(r'(?<!\b)[a-z]', '', text)
    269     return text.upper()
    270 
    271 
    272 def up_to_unmatched_closing_paren(s):
    273     """Splits a string into two parts up to first unmatched ')'.
    274 
    275     Args:
    276       s: a string which is a substring of line after '('
    277       (e.g., "a == (b + c))").
    278 
    279     Returns:
    280       A pair of strings (prefix before first unmatched ')',
    281       remainder of s after first unmatched ')'), e.g.,
    282       up_to_unmatched_closing_paren("a == (b + c)) { ")
    283       returns "a == (b + c)", " {".
    284       Returns None, None if there is no unmatched ')'
    285 
    286     """
    287     i = 1
    288     for pos, c in enumerate(s):
    289       if c == '(':
    290         i += 1
    291       elif c == ')':
    292         i -= 1
    293         if i == 0:
    294           return s[:pos], s[pos + 1:]
    295     return None, None
    296 
    297 class _IncludeState(dict):
    298     """Tracks line numbers for includes, and the order in which includes appear.
    299 
    300     As a dict, an _IncludeState object serves as a mapping between include
    301     filename and line number on which that file was included.
    302 
    303     Call check_next_include_order() once for each header in the file, passing
    304     in the type constants defined above. Calls in an illegal order will
    305     raise an _IncludeError with an appropriate error message.
    306 
    307     """
    308     # self._section will move monotonically through this set. If it ever
    309     # needs to move backwards, check_next_include_order will raise an error.
    310     _INITIAL_SECTION = 0
    311     _CONFIG_SECTION = 1
    312     _PRIMARY_SECTION = 2
    313     _OTHER_SECTION = 3
    314 
    315     _TYPE_NAMES = {
    316         _CONFIG_HEADER: 'WebCore config.h',
    317         _PRIMARY_HEADER: 'header this file implements',
    318         _OTHER_HEADER: 'other header',
    319         _MOC_HEADER: 'moc file',
    320         }
    321     _SECTION_NAMES = {
    322         _INITIAL_SECTION: "... nothing.",
    323         _CONFIG_SECTION: "WebCore config.h.",
    324         _PRIMARY_SECTION: 'a header this file implements.',
    325         _OTHER_SECTION: 'other header.',
    326         }
    327 
    328     def __init__(self):
    329         dict.__init__(self)
    330         self._section = self._INITIAL_SECTION
    331         self._visited_primary_section = False
    332         self.header_types = dict();
    333 
    334     def visited_primary_section(self):
    335         return self._visited_primary_section
    336 
    337     def check_next_include_order(self, header_type, file_is_header, primary_header_exists):
    338         """Returns a non-empty error message if the next header is out of order.
    339 
    340         This function also updates the internal state to be ready to check
    341         the next include.
    342 
    343         Args:
    344           header_type: One of the _XXX_HEADER constants defined above.
    345           file_is_header: Whether the file that owns this _IncludeState is itself a header
    346 
    347         Returns:
    348           The empty string if the header is in the right order, or an
    349           error message describing what's wrong.
    350 
    351         """
    352         if header_type == _CONFIG_HEADER and file_is_header:
    353             return 'Header file should not contain WebCore config.h.'
    354         if header_type == _PRIMARY_HEADER and file_is_header:
    355             return 'Header file should not contain itself.'
    356         if header_type == _MOC_HEADER:
    357             return ''
    358 
    359         error_message = ''
    360         if self._section != self._OTHER_SECTION:
    361             before_error_message = ('Found %s before %s' %
    362                                     (self._TYPE_NAMES[header_type],
    363                                      self._SECTION_NAMES[self._section + 1]))
    364         after_error_message = ('Found %s after %s' %
    365                                 (self._TYPE_NAMES[header_type],
    366                                  self._SECTION_NAMES[self._section]))
    367 
    368         if header_type == _CONFIG_HEADER:
    369             if self._section >= self._CONFIG_SECTION:
    370                 error_message = after_error_message
    371             self._section = self._CONFIG_SECTION
    372         elif header_type == _PRIMARY_HEADER:
    373             if self._section >= self._PRIMARY_SECTION:
    374                 error_message = after_error_message
    375             elif self._section < self._CONFIG_SECTION:
    376                 error_message = before_error_message
    377             self._section = self._PRIMARY_SECTION
    378             self._visited_primary_section = True
    379         else:
    380             assert header_type == _OTHER_HEADER
    381             if not file_is_header and self._section < self._PRIMARY_SECTION:
    382                 if primary_header_exists:
    383                     error_message = before_error_message
    384             self._section = self._OTHER_SECTION
    385 
    386         return error_message
    387 
    388 
    389 class Position(object):
    390     """Holds the position of something."""
    391     def __init__(self, row, column):
    392         self.row = row
    393         self.column = column
    394 
    395     def __str__(self):
    396         return '(%s, %s)' % (self.row, self.column)
    397 
    398     def __cmp__(self, other):
    399         return self.row.__cmp__(other.row) or self.column.__cmp__(other.column)
    400 
    401 
    402 class Parameter(object):
    403     """Information about one function parameter."""
    404     def __init__(self, parameter, parameter_name_index, row):
    405         self.type = parameter[:parameter_name_index].strip()
    406         # Remove any initializers from the parameter name (e.g. int i = 5).
    407         self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip()
    408         self.row = row
    409 
    410     @memoized
    411     def lower_with_underscores_name(self):
    412         """Returns the parameter name in the lower with underscores format."""
    413         return _convert_to_lower_with_underscores(self.name)
    414 
    415 
    416 class SingleLineView(object):
    417     """Converts multiple lines into a single line (with line breaks replaced by a
    418        space) to allow for easier searching."""
    419     def __init__(self, lines, start_position, end_position):
    420         """Create a SingleLineView instance.
    421 
    422         Args:
    423           lines: a list of multiple lines to combine into a single line.
    424           start_position: offset within lines of where to start the single line.
    425           end_position: just after where to end (like a slice operation).
    426         """
    427         # Get the rows of interest.
    428         trimmed_lines = lines[start_position.row:end_position.row + 1]
    429 
    430         # Remove the columns on the last line that aren't included.
    431         trimmed_lines[-1] = trimmed_lines[-1][:end_position.column]
    432 
    433         # Remove the columns on the first line that aren't included.
    434         trimmed_lines[0] = trimmed_lines[0][start_position.column:]
    435 
    436         # Create a single line with all of the parameters.
    437         self.single_line = ' '.join(trimmed_lines)
    438 
    439         # Keep the row lengths, so we can calculate the original row number
    440         # given a column in the single line (adding 1 due to the space added
    441         # during the join).
    442         self._row_lengths = [len(line) + 1 for line in trimmed_lines]
    443         self._starting_row = start_position.row
    444 
    445     def convert_column_to_row(self, single_line_column_number):
    446         """Convert the column number from the single line into the original
    447         line number.
    448 
    449         Special cases:
    450         * Columns in the added spaces are considered part of the previous line.
    451         * Columns beyond the end of the line are consider part the last line
    452         in the view."""
    453         total_columns = 0
    454         row_offset = 0
    455         while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]:
    456             total_columns += self._row_lengths[row_offset]
    457             row_offset += 1
    458         return self._starting_row + row_offset
    459 
    460 
    461 def create_skeleton_parameters(all_parameters):
    462     """Converts a parameter list to a skeleton version.
    463 
    464     The skeleton only has one word for the parameter name, one word for the type,
    465     and commas after each parameter and only there. Everything in the skeleton
    466     remains in the same columns as the original."""
    467     all_simplifications = (
    468         # Remove template parameters, function declaration parameters, etc.
    469         r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})',
    470         # Remove all initializers.
    471         r'=[^,]*',
    472         # Remove :: and everything before it.
    473         r'[^,]*::',
    474         # Remove modifiers like &, *.
    475         r'[&*]',
    476         # Remove const modifiers.
    477         r'\bconst\s+(?=[A-Za-z])',
    478         # Remove numerical modifiers like long.
    479         r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)')
    480 
    481     skeleton_parameters = all_parameters
    482     for simplification in all_simplifications:
    483         skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters)
    484     # If there are any parameters, then add a , after the last one to
    485     # make a regular pattern of a , following every parameter.
    486     if skeleton_parameters.strip():
    487         skeleton_parameters += ','
    488     return skeleton_parameters
    489 
    490 
    491 def find_parameter_name_index(skeleton_parameter):
    492     """Determines where the parametere name starts given the skeleton parameter."""
    493     # The first space from the right in the simplified parameter is where the parameter
    494     # name starts unless the first space is before any content in the simplified parameter.
    495     before_name_index = skeleton_parameter.rstrip().rfind(' ')
    496     if before_name_index != -1 and skeleton_parameter[:before_name_index].strip():
    497         return before_name_index + 1
    498     return len(skeleton_parameter)
    499 
    500 
    501 def parameter_list(elided_lines, start_position, end_position):
    502     """Generator for a function's parameters."""
    503     # Create new positions that omit the outer parenthesis of the parameters.
    504     start_position = Position(row=start_position.row, column=start_position.column + 1)
    505     end_position = Position(row=end_position.row, column=end_position.column - 1)
    506     single_line_view = SingleLineView(elided_lines, start_position, end_position)
    507     skeleton_parameters = create_skeleton_parameters(single_line_view.single_line)
    508     end_index = -1
    509 
    510     while True:
    511         # Find the end of the next parameter.
    512         start_index = end_index + 1
    513         end_index = skeleton_parameters.find(',', start_index)
    514 
    515         # No comma means that all parameters have been parsed.
    516         if end_index == -1:
    517             return
    518         row = single_line_view.convert_column_to_row(end_index)
    519 
    520         # Parse the parameter into a type and parameter name.
    521         skeleton_parameter = skeleton_parameters[start_index:end_index]
    522         name_offset = find_parameter_name_index(skeleton_parameter)
    523         parameter = single_line_view.single_line[start_index:end_index]
    524         yield Parameter(parameter, name_offset, row)
    525 
    526 
    527 class _FunctionState(object):
    528     """Tracks current function name and the number of lines in its body.
    529 
    530     Attributes:
    531       min_confidence: The minimum confidence level to use while checking style.
    532 
    533     """
    534 
    535     _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    536     _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
    537 
    538     def __init__(self, min_confidence):
    539         self.min_confidence = min_confidence
    540         self.current_function = ''
    541         self.in_a_function = False
    542         self.lines_in_function = 0
    543         # Make sure these will not be mistaken for real positions (even when a
    544         # small amount is added to them).
    545         self.body_start_position = Position(-1000, 0)
    546         self.end_position = Position(-1000, 0)
    547 
    548     def begin(self, function_name, function_name_start_position, body_start_position, end_position,
    549               parameter_start_position, parameter_end_position, clean_lines):
    550         """Start analyzing function body.
    551 
    552         Args:
    553             function_name: The name of the function being tracked.
    554             function_name_start_position: Position in elided where the function name starts.
    555             body_start_position: Position in elided of the { or the ; for a prototype.
    556             end_position: Position in elided just after the final } (or ; is.
    557             parameter_start_position: Position in elided of the '(' for the parameters.
    558             parameter_end_position: Position in elided just after the ')' for the parameters.
    559             clean_lines: A CleansedLines instance containing the file.
    560         """
    561         self.in_a_function = True
    562         self.lines_in_function = -1  # Don't count the open brace line.
    563         self.current_function = function_name
    564         self.function_name_start_position = function_name_start_position
    565         self.body_start_position = body_start_position
    566         self.end_position = end_position
    567         self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';'
    568         self.parameter_start_position = parameter_start_position
    569         self.parameter_end_position = parameter_end_position
    570         self.is_pure = False
    571         if self.is_declaration:
    572             characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line
    573             self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters))
    574         self._clean_lines = clean_lines
    575         self._parameter_list = None
    576 
    577     def modifiers_and_return_type(self):
    578         """Returns the modifiers and the return type."""
    579         # Go backwards from where the function name is until we encounter one of several things:
    580         #   ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0)
    581         elided = self._clean_lines.elided
    582         start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)',
    583                                           elided, self.parameter_start_position, Position(0, 0))
    584         return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip()
    585 
    586     def parameter_list(self):
    587         if not self._parameter_list:
    588             # Store the final result as a tuple since that is immutable.
    589             self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position))
    590 
    591         return self._parameter_list
    592 
    593     def count(self, line_number):
    594         """Count line in current function body."""
    595         if self.in_a_function and line_number >= self.body_start_position.row:
    596             self.lines_in_function += 1
    597 
    598     def check(self, error, line_number):
    599         """Report if too many lines in function body.
    600 
    601         Args:
    602           error: The function to call with any errors found.
    603           line_number: The number of the line to check.
    604         """
    605         if match(r'T(EST|est)', self.current_function):
    606             base_trigger = self._TEST_TRIGGER
    607         else:
    608             base_trigger = self._NORMAL_TRIGGER
    609         trigger = base_trigger * 2 ** self.min_confidence
    610 
    611         if self.lines_in_function > trigger:
    612             error_level = int(math.log(self.lines_in_function / base_trigger, 2))
    613             # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
    614             if error_level > 5:
    615                 error_level = 5
    616             error(line_number, 'readability/fn_size', error_level,
    617                   'Small and focused functions are preferred:'
    618                   ' %s has %d non-comment lines'
    619                   ' (error triggered by exceeding %d lines).'  % (
    620                       self.current_function, self.lines_in_function, trigger))
    621 
    622     def end(self):
    623         """Stop analyzing function body."""
    624         self.in_a_function = False
    625 
    626 
    627 class _IncludeError(Exception):
    628     """Indicates a problem with the include order in a file."""
    629     pass
    630 
    631 
    632 class FileInfo:
    633     """Provides utility functions for filenames.
    634 
    635     FileInfo provides easy access to the components of a file's path
    636     relative to the project root.
    637     """
    638 
    639     def __init__(self, filename):
    640         self._filename = filename
    641 
    642     def full_name(self):
    643         """Make Windows paths like Unix."""
    644         return os.path.abspath(self._filename).replace('\\', '/')
    645 
    646     def repository_name(self):
    647         """Full name after removing the local path to the repository.
    648 
    649         If we have a real absolute path name here we can try to do something smart:
    650         detecting the root of the checkout and truncating /path/to/checkout from
    651         the name so that we get header guards that don't include things like
    652         "C:\Documents and Settings\..." or "/home/username/..." in them and thus
    653         people on different computers who have checked the source out to different
    654         locations won't see bogus errors.
    655         """
    656         fullname = self.full_name()
    657 
    658         if os.path.exists(fullname):
    659             project_dir = os.path.dirname(fullname)
    660 
    661             if os.path.exists(os.path.join(project_dir, ".svn")):
    662                 # If there's a .svn file in the current directory, we
    663                 # recursively look up the directory tree for the top
    664                 # of the SVN checkout
    665                 root_dir = project_dir
    666                 one_up_dir = os.path.dirname(root_dir)
    667                 while os.path.exists(os.path.join(one_up_dir, ".svn")):
    668                     root_dir = os.path.dirname(root_dir)
    669                     one_up_dir = os.path.dirname(one_up_dir)
    670 
    671                 prefix = os.path.commonprefix([root_dir, project_dir])
    672                 return fullname[len(prefix) + 1:]
    673 
    674             # Not SVN? Try to find a git top level directory by
    675             # searching up from the current path.
    676             root_dir = os.path.dirname(fullname)
    677             while (root_dir != os.path.dirname(root_dir)
    678                    and not os.path.exists(os.path.join(root_dir, ".git"))):
    679                 root_dir = os.path.dirname(root_dir)
    680                 if os.path.exists(os.path.join(root_dir, ".git")):
    681                     prefix = os.path.commonprefix([root_dir, project_dir])
    682                     return fullname[len(prefix) + 1:]
    683 
    684         # Don't know what to do; header guard warnings may be wrong...
    685         return fullname
    686 
    687     def split(self):
    688         """Splits the file into the directory, basename, and extension.
    689 
    690         For 'chrome/browser/browser.cpp', Split() would
    691         return ('chrome/browser', 'browser', '.cpp')
    692 
    693         Returns:
    694           A tuple of (directory, basename, extension).
    695         """
    696 
    697         googlename = self.repository_name()
    698         project, rest = os.path.split(googlename)
    699         return (project,) + os.path.splitext(rest)
    700 
    701     def base_name(self):
    702         """File base name - text after the final slash, before the final period."""
    703         return self.split()[1]
    704 
    705     def extension(self):
    706         """File extension - text following the final period."""
    707         return self.split()[2]
    708 
    709     def no_extension(self):
    710         """File has no source file extension."""
    711         return '/'.join(self.split()[0:2])
    712 
    713     def is_source(self):
    714         """File has a source file extension."""
    715         return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
    716 
    717 
    718 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
    719 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
    720     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
    721 # Matches strings.  Escape codes should already be removed by ESCAPES.
    722 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
    723 # Matches characters.  Escape codes should already be removed by ESCAPES.
    724 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
    725 # Matches multi-line C++ comments.
    726 # This RE is a little bit more complicated than one might expect, because we
    727 # have to take care of space removals tools so we can handle comments inside
    728 # statements better.
    729 # The current rule is: We only clear spaces from both sides when we're at the
    730 # end of the line. Otherwise, we try to remove spaces from the right side,
    731 # if this doesn't work we try on left side but only if there's a non-character
    732 # on the right.
    733 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
    734     r"""(\s*/\*.*\*/\s*$|
    735             /\*.*\*/\s+|
    736          \s+/\*.*\*/(?=\W)|
    737             /\*.*\*/)""", re.VERBOSE)
    738 
    739 
    740 def is_cpp_string(line):
    741     """Does line terminate so, that the next symbol is in string constant.
    742 
    743     This function does not consider single-line nor multi-line comments.
    744 
    745     Args:
    746       line: is a partial line of code starting from the 0..n.
    747 
    748     Returns:
    749       True, if next character appended to 'line' is inside a
    750       string constant.
    751     """
    752 
    753     line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
    754     return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
    755 
    756 
    757 def find_next_multi_line_comment_start(lines, line_index):
    758     """Find the beginning marker for a multiline comment."""
    759     while line_index < len(lines):
    760         if lines[line_index].strip().startswith('/*'):
    761             # Only return this marker if the comment goes beyond this line
    762             if lines[line_index].strip().find('*/', 2) < 0:
    763                 return line_index
    764         line_index += 1
    765     return len(lines)
    766 
    767 
    768 def find_next_multi_line_comment_end(lines, line_index):
    769     """We are inside a comment, find the end marker."""
    770     while line_index < len(lines):
    771         if lines[line_index].strip().endswith('*/'):
    772             return line_index
    773         line_index += 1
    774     return len(lines)
    775 
    776 
    777 def remove_multi_line_comments_from_range(lines, begin, end):
    778     """Clears a range of lines for multi-line comments."""
    779     # Having // dummy comments makes the lines non-empty, so we will not get
    780     # unnecessary blank line warnings later in the code.
    781     for i in range(begin, end):
    782         lines[i] = '// dummy'
    783 
    784 
    785 def remove_multi_line_comments(lines, error):
    786     """Removes multiline (c-style) comments from lines."""
    787     line_index = 0
    788     while line_index < len(lines):
    789         line_index_begin = find_next_multi_line_comment_start(lines, line_index)
    790         if line_index_begin >= len(lines):
    791             return
    792         line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
    793         if line_index_end >= len(lines):
    794             error(line_index_begin + 1, 'readability/multiline_comment', 5,
    795                   'Could not find end of multi-line comment')
    796             return
    797         remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
    798         line_index = line_index_end + 1
    799 
    800 
    801 def cleanse_comments(line):
    802     """Removes //-comments and single-line C-style /* */ comments.
    803 
    804     Args:
    805       line: A line of C++ source.
    806 
    807     Returns:
    808       The line with single-line comments removed.
    809     """
    810     comment_position = line.find('//')
    811     if comment_position != -1 and not is_cpp_string(line[:comment_position]):
    812         line = line[:comment_position]
    813     # get rid of /* ... */
    814     return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
    815 
    816 
    817 class CleansedLines(object):
    818     """Holds 3 copies of all lines with different preprocessing applied to them.
    819 
    820     1) elided member contains lines without strings and comments,
    821     2) lines member contains lines without comments, and
    822     3) raw member contains all the lines without processing.
    823     All these three members are of <type 'list'>, and of the same length.
    824     """
    825 
    826     def __init__(self, lines):
    827         self.elided = []
    828         self.lines = []
    829         self.raw_lines = lines
    830         self._num_lines = len(lines)
    831         for line_number in range(len(lines)):
    832             self.lines.append(cleanse_comments(lines[line_number]))
    833             elided = self.collapse_strings(lines[line_number])
    834             self.elided.append(cleanse_comments(elided))
    835 
    836     def num_lines(self):
    837         """Returns the number of lines represented."""
    838         return self._num_lines
    839 
    840     @staticmethod
    841     def collapse_strings(elided):
    842         """Collapses strings and chars on a line to simple "" or '' blocks.
    843 
    844         We nix strings first so we're not fooled by text like '"http://"'
    845 
    846         Args:
    847           elided: The line being processed.
    848 
    849         Returns:
    850           The line with collapsed strings.
    851         """
    852         if not _RE_PATTERN_INCLUDE.match(elided):
    853             # Remove escaped characters first to make quote/single quote collapsing
    854             # basic.  Things that look like escaped characters shouldn't occur
    855             # outside of strings and chars.
    856             elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
    857             elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
    858             elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
    859         return elided
    860 
    861 
    862 def close_expression(elided, position):
    863     """If input points to ( or { or [, finds the position that closes it.
    864 
    865     If elided[position.row][position.column] points to a '(' or '{' or '[',
    866     finds the line_number/pos that correspond to the closing of the expression.
    867 
    868      Args:
    869        elided: A CleansedLines.elided instance containing the file.
    870        position: The position of the opening item.
    871 
    872      Returns:
    873       The Position *past* the closing brace, or Position(len(elided), -1)
    874       if we never find a close. Note we ignore strings and comments when matching.
    875     """
    876     line = elided[position.row]
    877     start_character = line[position.column]
    878     if start_character == '(':
    879         enclosing_character_regex = r'[\(\)]'
    880     elif start_character == '[':
    881         enclosing_character_regex = r'[\[\]]'
    882     elif start_character == '{':
    883         enclosing_character_regex = r'[\{\}]'
    884     else:
    885         return Position(len(elided), -1)
    886 
    887     current_column = position.column + 1
    888     line_number = position.row
    889     net_open = 1
    890     for line in elided[position.row:]:
    891         line = line[current_column:]
    892 
    893         # Search the current line for opening and closing characters.
    894         while True:
    895             next_enclosing_character = search(enclosing_character_regex, line)
    896             # No more on this line.
    897             if not next_enclosing_character:
    898                 break
    899             current_column += next_enclosing_character.end(0)
    900             line = line[next_enclosing_character.end(0):]
    901             if next_enclosing_character.group(0) == start_character:
    902                 net_open += 1
    903             else:
    904                 net_open -= 1
    905                 if not net_open:
    906                     return Position(line_number, current_column)
    907 
    908         # Proceed to the next line.
    909         line_number += 1
    910         current_column = 0
    911 
    912     # The given item was not closed.
    913     return Position(len(elided), -1)
    914 
    915 def check_for_copyright(lines, error):
    916     """Logs an error if no Copyright message appears at the top of the file."""
    917 
    918     # We'll say it should occur by line 10. Don't forget there's a
    919     # dummy line at the front.
    920     for line in xrange(1, min(len(lines), 11)):
    921         if re.search(r'Copyright', lines[line], re.I):
    922             break
    923     else:                       # means no copyright line was found
    924         error(0, 'legal/copyright', 5,
    925               'No copyright message found.  '
    926               'You should have a line: "Copyright [year] <Copyright Owner>"')
    927 
    928 
    929 def get_header_guard_cpp_variable(filename):
    930     """Returns the CPP variable that should be used as a header guard.
    931 
    932     Args:
    933       filename: The name of a C++ header file.
    934 
    935     Returns:
    936       The CPP variable that should be used as a header guard in the
    937       named file.
    938 
    939     """
    940 
    941     # Restores original filename in case that style checker is invoked from Emacs's
    942     # flymake.
    943     filename = re.sub(r'_flymake\.h$', '.h', filename)
    944 
    945     standard_name = sub(r'[-.\s]', '_', os.path.basename(filename))
    946 
    947     # Files under WTF typically have header guards that start with WTF_.
    948     if '/wtf/' in filename:
    949         special_name = "WTF_" + standard_name
    950     else:
    951         special_name = standard_name
    952     return (special_name, standard_name)
    953 
    954 
    955 def check_for_header_guard(filename, lines, error):
    956     """Checks that the file contains a header guard.
    957 
    958     Logs an error if no #ifndef header guard is present.  For other
    959     headers, checks that the full pathname is used.
    960 
    961     Args:
    962       filename: The name of the C++ header file.
    963       lines: An array of strings, each representing a line of the file.
    964       error: The function to call with any errors found.
    965     """
    966 
    967     cppvar = get_header_guard_cpp_variable(filename)
    968 
    969     ifndef = None
    970     ifndef_line_number = 0
    971     define = None
    972     for line_number, line in enumerate(lines):
    973         line_split = line.split()
    974         if len(line_split) >= 2:
    975             # find the first occurrence of #ifndef and #define, save arg
    976             if not ifndef and line_split[0] == '#ifndef':
    977                 # set ifndef to the header guard presented on the #ifndef line.
    978                 ifndef = line_split[1]
    979                 ifndef_line_number = line_number
    980             if not define and line_split[0] == '#define':
    981                 define = line_split[1]
    982             if define and ifndef:
    983                 break
    984 
    985     if not ifndef or not define or ifndef != define:
    986         error(0, 'build/header_guard', 5,
    987               'No #ifndef header guard found, suggested CPP variable is: %s' %
    988               cppvar[0])
    989         return
    990 
    991     # The guard should be File_h.
    992     if ifndef not in cppvar:
    993         error(ifndef_line_number, 'build/header_guard', 5,
    994               '#ifndef header guard has wrong style, please use: %s' % cppvar[0])
    995 
    996 
    997 def check_for_unicode_replacement_characters(lines, error):
    998     """Logs an error for each line containing Unicode replacement characters.
    999 
   1000     These indicate that either the file contained invalid UTF-8 (likely)
   1001     or Unicode replacement characters (which it shouldn't).  Note that
   1002     it's possible for this to throw off line numbering if the invalid
   1003     UTF-8 occurred adjacent to a newline.
   1004 
   1005     Args:
   1006       lines: An array of strings, each representing a line of the file.
   1007       error: The function to call with any errors found.
   1008     """
   1009     for line_number, line in enumerate(lines):
   1010         if u'\ufffd' in line:
   1011             error(line_number, 'readability/utf8', 5,
   1012                   'Line contains invalid UTF-8 (or Unicode replacement character).')
   1013 
   1014 
   1015 def check_for_new_line_at_eof(lines, error):
   1016     """Logs an error if there is no newline char at the end of the file.
   1017 
   1018     Args:
   1019       lines: An array of strings, each representing a line of the file.
   1020       error: The function to call with any errors found.
   1021     """
   1022 
   1023     # The array lines() was created by adding two newlines to the
   1024     # original file (go figure), then splitting on \n.
   1025     # To verify that the file ends in \n, we just have to make sure the
   1026     # last-but-two element of lines() exists and is empty.
   1027     if len(lines) < 3 or lines[-2]:
   1028         error(len(lines) - 2, 'whitespace/ending_newline', 5,
   1029               'Could not find a newline character at the end of the file.')
   1030 
   1031 
   1032 def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
   1033     """Logs an error if we see /* ... */ or "..." that extend past one line.
   1034 
   1035     /* ... */ comments are legit inside macros, for one line.
   1036     Otherwise, we prefer // comments, so it's ok to warn about the
   1037     other.  Likewise, it's ok for strings to extend across multiple
   1038     lines, as long as a line continuation character (backslash)
   1039     terminates each line. Although not currently prohibited by the C++
   1040     style guide, it's ugly and unnecessary. We don't do well with either
   1041     in this lint program, so we warn about both.
   1042 
   1043     Args:
   1044       clean_lines: A CleansedLines instance containing the file.
   1045       line_number: The number of the line to check.
   1046       error: The function to call with any errors found.
   1047     """
   1048     line = clean_lines.elided[line_number]
   1049 
   1050     # Remove all \\ (escaped backslashes) from the line. They are OK, and the
   1051     # second (escaped) slash may trigger later \" detection erroneously.
   1052     line = line.replace('\\\\', '')
   1053 
   1054     if line.count('/*') > line.count('*/'):
   1055         error(line_number, 'readability/multiline_comment', 5,
   1056               'Complex multi-line /*...*/-style comment found. '
   1057               'Lint may give bogus warnings.  '
   1058               'Consider replacing these with //-style comments, '
   1059               'with #if 0...#endif, '
   1060               'or with more clearly structured multi-line comments.')
   1061 
   1062     if (line.count('"') - line.count('\\"')) % 2:
   1063         error(line_number, 'readability/multiline_string', 5,
   1064               'Multi-line string ("...") found.  This lint script doesn\'t '
   1065               'do well with such strings, and may give bogus warnings.  They\'re '
   1066               'ugly and unnecessary, and you should use concatenation instead".')
   1067 
   1068 
   1069 _THREADING_LIST = (
   1070     ('asctime(', 'asctime_r('),
   1071     ('ctime(', 'ctime_r('),
   1072     ('getgrgid(', 'getgrgid_r('),
   1073     ('getgrnam(', 'getgrnam_r('),
   1074     ('getlogin(', 'getlogin_r('),
   1075     ('getpwnam(', 'getpwnam_r('),
   1076     ('getpwuid(', 'getpwuid_r('),
   1077     ('gmtime(', 'gmtime_r('),
   1078     ('localtime(', 'localtime_r('),
   1079     ('rand(', 'rand_r('),
   1080     ('readdir(', 'readdir_r('),
   1081     ('strtok(', 'strtok_r('),
   1082     ('ttyname(', 'ttyname_r('),
   1083     )
   1084 
   1085 
   1086 def check_posix_threading(clean_lines, line_number, error):
   1087     """Checks for calls to thread-unsafe functions.
   1088 
   1089     Much code has been originally written without consideration of
   1090     multi-threading. Also, engineers are relying on their old experience;
   1091     they have learned posix before threading extensions were added. These
   1092     tests guide the engineers to use thread-safe functions (when using
   1093     posix directly).
   1094 
   1095     Args:
   1096       clean_lines: A CleansedLines instance containing the file.
   1097       line_number: The number of the line to check.
   1098       error: The function to call with any errors found.
   1099     """
   1100     line = clean_lines.elided[line_number]
   1101     for single_thread_function, multithread_safe_function in _THREADING_LIST:
   1102         index = line.find(single_thread_function)
   1103         # Comparisons made explicit for clarity
   1104         if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
   1105                                           and line[index - 1] not in ('_', '.', '>'))):
   1106             error(line_number, 'runtime/threadsafe_fn', 2,
   1107                   'Consider using ' + multithread_safe_function +
   1108                   '...) instead of ' + single_thread_function +
   1109                   '...) for improved thread safety.')
   1110 
   1111 
   1112 # Matches invalid increment: *count++, which moves pointer instead of
   1113 # incrementing a value.
   1114 _RE_PATTERN_INVALID_INCREMENT = re.compile(
   1115     r'^\s*\*\w+(\+\+|--);')
   1116 
   1117 
   1118 def check_invalid_increment(clean_lines, line_number, error):
   1119     """Checks for invalid increment *count++.
   1120 
   1121     For example following function:
   1122     void increment_counter(int* count) {
   1123         *count++;
   1124     }
   1125     is invalid, because it effectively does count++, moving pointer, and should
   1126     be replaced with ++*count, (*count)++ or *count += 1.
   1127 
   1128     Args:
   1129       clean_lines: A CleansedLines instance containing the file.
   1130       line_number: The number of the line to check.
   1131       error: The function to call with any errors found.
   1132     """
   1133     line = clean_lines.elided[line_number]
   1134     if _RE_PATTERN_INVALID_INCREMENT.match(line):
   1135         error(line_number, 'runtime/invalid_increment', 5,
   1136               'Changing pointer instead of value (or unused value of operator*).')
   1137 
   1138 
   1139 class _ClassInfo(object):
   1140     """Stores information about a class."""
   1141 
   1142     def __init__(self, name, line_number):
   1143         self.name = name
   1144         self.line_number = line_number
   1145         self.seen_open_brace = False
   1146         self.is_derived = False
   1147         self.virtual_method_line_number = None
   1148         self.has_virtual_destructor = False
   1149         self.brace_depth = 0
   1150         self.unsigned_bitfields = []
   1151         self.bool_bitfields = []
   1152 
   1153 
   1154 class _ClassState(object):
   1155     """Holds the current state of the parse relating to class declarations.
   1156 
   1157     It maintains a stack of _ClassInfos representing the parser's guess
   1158     as to the current nesting of class declarations. The innermost class
   1159     is at the top (back) of the stack. Typically, the stack will either
   1160     be empty or have exactly one entry.
   1161     """
   1162 
   1163     def __init__(self):
   1164         self.classinfo_stack = []
   1165 
   1166     def check_finished(self, error):
   1167         """Checks that all classes have been completely parsed.
   1168 
   1169         Call this when all lines in a file have been processed.
   1170         Args:
   1171           error: The function to call with any errors found.
   1172         """
   1173         if self.classinfo_stack:
   1174             # Note: This test can result in false positives if #ifdef constructs
   1175             # get in the way of brace matching. See the testBuildClass test in
   1176             # cpp_style_unittest.py for an example of this.
   1177             error(self.classinfo_stack[0].line_number, 'build/class', 5,
   1178                   'Failed to find complete declaration of class %s' %
   1179                   self.classinfo_stack[0].name)
   1180 
   1181 
   1182 class _FileState(object):
   1183     def __init__(self, clean_lines, file_extension):
   1184         self._did_inside_namespace_indent_warning = False
   1185         self._clean_lines = clean_lines
   1186         if file_extension in ['m', 'mm']:
   1187             self._is_objective_c = True
   1188             self._is_c = False
   1189         elif file_extension == 'h':
   1190             # In the case of header files, it is unknown if the file
   1191             # is c / objective c or not, so set this value to None and then
   1192             # if it is requested, use heuristics to guess the value.
   1193             self._is_objective_c = None
   1194             self._is_c = None
   1195         elif file_extension == 'c':
   1196             self._is_c = True
   1197             self._is_objective_c = False
   1198         else:
   1199             self._is_objective_c = False
   1200             self._is_c = False
   1201 
   1202     def set_did_inside_namespace_indent_warning(self):
   1203         self._did_inside_namespace_indent_warning = True
   1204 
   1205     def did_inside_namespace_indent_warning(self):
   1206         return self._did_inside_namespace_indent_warning
   1207 
   1208     def is_objective_c(self):
   1209         if self._is_objective_c is None:
   1210             for line in self._clean_lines.elided:
   1211                 # Starting with @ or #import seem like the best indications
   1212                 # that we have an Objective C file.
   1213                 if line.startswith("@") or line.startswith("#import"):
   1214                     self._is_objective_c = True
   1215                     break
   1216             else:
   1217                 self._is_objective_c = False
   1218         return self._is_objective_c
   1219 
   1220     def is_c(self):
   1221         if self._is_c is None:
   1222             for line in self._clean_lines.lines:
   1223                 # if extern "C" is found, then it is a good indication
   1224                 # that we have a C header file.
   1225                 if line.startswith('extern "C"'):
   1226                     self._is_c = True
   1227                     break
   1228             else:
   1229                 self._is_c = False
   1230         return self._is_c
   1231 
   1232     def is_c_or_objective_c(self):
   1233         """Return whether the file extension corresponds to C or Objective-C."""
   1234         return self.is_c() or self.is_objective_c()
   1235 
   1236 
   1237 class _EnumState(object):
   1238     """Maintains whether currently in an enum declaration, and checks whether
   1239     enum declarations follow the style guide.
   1240     """
   1241 
   1242     def __init__(self):
   1243         self.in_enum_decl = False
   1244         self.is_webidl_enum = False
   1245 
   1246     def process_clean_line(self, line):
   1247         # FIXME: The regular expressions for expr_all_uppercase and expr_enum_end only accept integers
   1248         # and identifiers for the value of the enumerator, but do not accept any other constant
   1249         # expressions. However, this is sufficient for now (11/27/2012).
   1250         expr_all_uppercase = r'\s*[A-Z0-9_]+\s*(?:=\s*[a-zA-Z0-9]+\s*)?,?\s*$'
   1251         expr_starts_lowercase = r'\s*[a-z]'
   1252         expr_enum_end = r'}\s*(?:[a-zA-Z0-9]+\s*(?:=\s*[a-zA-Z0-9]+)?)?\s*;\s*'
   1253         expr_enum_start = r'\s*enum(?:\s+[a-zA-Z0-9]+)?\s*\{?\s*'
   1254         if self.in_enum_decl:
   1255             if match(r'\s*' + expr_enum_end + r'$', line):
   1256                 self.in_enum_decl = False
   1257                 self.is_webidl_enum = False
   1258             elif match(expr_all_uppercase, line):
   1259                 return self.is_webidl_enum
   1260             elif match(expr_starts_lowercase, line):
   1261                 return False
   1262         else:
   1263             matched = match(expr_enum_start + r'$', line)
   1264             if matched:
   1265                 self.in_enum_decl = True
   1266             else:
   1267                 matched = match(expr_enum_start + r'(?P<members>.*)' + expr_enum_end + r'$', line)
   1268                 if matched:
   1269                     members = matched.group('members').split(',')
   1270                     found_invalid_member = False
   1271                     for member in members:
   1272                         if match(expr_all_uppercase, member):
   1273                             found_invalid_member = not self.is_webidl_enum
   1274                         if match(expr_starts_lowercase, member):
   1275                             found_invalid_member = True
   1276                         if found_invalid_member:
   1277                             self.is_webidl_enum = False
   1278                             return False
   1279                     return True
   1280         return True
   1281 
   1282 def check_for_non_standard_constructs(clean_lines, line_number,
   1283                                       class_state, error):
   1284     """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
   1285 
   1286     Complain about several constructs which gcc-2 accepts, but which are
   1287     not standard C++.  Warning about these in lint is one way to ease the
   1288     transition to new compilers.
   1289     - put storage class first (e.g. "static const" instead of "const static").
   1290     - "%lld" instead of %qd" in printf-type functions.
   1291     - "%1$d" is non-standard in printf-type functions.
   1292     - "\%" is an undefined character escape sequence.
   1293     - text after #endif is not allowed.
   1294     - invalid inner-style forward declaration.
   1295     - >? and <? operators, and their >?= and <?= cousins.
   1296     - classes with virtual methods need virtual destructors (compiler warning
   1297         available, but not turned on yet.)
   1298 
   1299     Additionally, check for constructor/destructor style violations as it
   1300     is very convenient to do so while checking for gcc-2 compliance.
   1301 
   1302     Args:
   1303       clean_lines: A CleansedLines instance containing the file.
   1304       line_number: The number of the line to check.
   1305       class_state: A _ClassState instance which maintains information about
   1306                    the current stack of nested class declarations being parsed.
   1307       error: A callable to which errors are reported, which takes parameters:
   1308              line number, error level, and message
   1309     """
   1310 
   1311     # Remove comments from the line, but leave in strings for now.
   1312     line = clean_lines.lines[line_number]
   1313 
   1314     if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
   1315         error(line_number, 'runtime/printf_format', 3,
   1316               '%q in format strings is deprecated.  Use %ll instead.')
   1317 
   1318     if search(r'printf\s*\(.*".*%\d+\$', line):
   1319         error(line_number, 'runtime/printf_format', 2,
   1320               '%N$ formats are unconventional.  Try rewriting to avoid them.')
   1321 
   1322     # Remove escaped backslashes before looking for undefined escapes.
   1323     line = line.replace('\\\\', '')
   1324 
   1325     if search(r'("|\').*\\(%|\[|\(|{)', line):
   1326         error(line_number, 'build/printf_format', 3,
   1327               '%, [, (, and { are undefined character escapes.  Unescape them.')
   1328 
   1329     # For the rest, work with both comments and strings removed.
   1330     line = clean_lines.elided[line_number]
   1331 
   1332     if search(r'\b(const|volatile|void|char|short|int|long'
   1333               r'|float|double|signed|unsigned'
   1334               r'|schar|u?int8|u?int16|u?int32|u?int64)'
   1335               r'\s+(auto|register|static|extern|typedef)\b',
   1336               line):
   1337         error(line_number, 'build/storage_class', 5,
   1338               'Storage class (static, extern, typedef, etc) should be first.')
   1339 
   1340     if match(r'\s*#\s*endif\s*[^/\s]+', line):
   1341         error(line_number, 'build/endif_comment', 5,
   1342               'Uncommented text after #endif is non-standard.  Use a comment.')
   1343 
   1344     if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
   1345         error(line_number, 'build/forward_decl', 5,
   1346               'Inner-style forward declarations are invalid.  Remove this line.')
   1347 
   1348     if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
   1349         error(line_number, 'build/deprecated', 3,
   1350               '>? and <? (max and min) operators are non-standard and deprecated.')
   1351 
   1352     # Track class entry and exit, and attempt to find cases within the
   1353     # class declaration that don't meet the C++ style
   1354     # guidelines. Tracking is very dependent on the code matching Google
   1355     # style guidelines, but it seems to perform well enough in testing
   1356     # to be a worthwhile addition to the checks.
   1357     classinfo_stack = class_state.classinfo_stack
   1358     # Look for a class declaration
   1359     class_decl_match = match(
   1360         r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
   1361     if class_decl_match:
   1362         classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
   1363 
   1364     # Everything else in this function uses the top of the stack if it's
   1365     # not empty.
   1366     if not classinfo_stack:
   1367         return
   1368 
   1369     classinfo = classinfo_stack[-1]
   1370 
   1371     # If the opening brace hasn't been seen look for it and also
   1372     # parent class declarations.
   1373     if not classinfo.seen_open_brace:
   1374         # If the line has a ';' in it, assume it's a forward declaration or
   1375         # a single-line class declaration, which we won't process.
   1376         if line.find(';') != -1:
   1377             classinfo_stack.pop()
   1378             return
   1379         classinfo.seen_open_brace = (line.find('{') != -1)
   1380         # Look for a bare ':'
   1381         if search('(^|[^:]):($|[^:])', line):
   1382             classinfo.is_derived = True
   1383         if not classinfo.seen_open_brace:
   1384             return  # Everything else in this function is for after open brace
   1385 
   1386     # The class may have been declared with namespace or classname qualifiers.
   1387     # The constructor and destructor will not have those qualifiers.
   1388     base_classname = classinfo.name.split('::')[-1]
   1389 
   1390     # Look for single-argument constructors that aren't marked explicit.
   1391     # Technically a valid construct, but against style.
   1392     args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
   1393                  % re.escape(base_classname),
   1394                  line)
   1395     if (args
   1396         and args.group(1) != 'void'
   1397         and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
   1398                       args.group(1).strip())):
   1399         error(line_number, 'runtime/explicit', 5,
   1400               'Single-argument constructors should be marked explicit.')
   1401 
   1402     # Look for methods declared virtual.
   1403     if search(r'\bvirtual\b', line):
   1404         classinfo.virtual_method_line_number = line_number
   1405         # Only look for a destructor declaration on the same line. It would
   1406         # be extremely unlikely for the destructor declaration to occupy
   1407         # more than one line.
   1408         if search(r'~%s\s*\(' % base_classname, line):
   1409             classinfo.has_virtual_destructor = True
   1410 
   1411     # Look for class end.
   1412     brace_depth = classinfo.brace_depth
   1413     brace_depth = brace_depth + line.count('{') - line.count('}')
   1414     if brace_depth <= 0:
   1415         classinfo = classinfo_stack.pop()
   1416         # Try to detect missing virtual destructor declarations.
   1417         # For now, only warn if a non-derived class with virtual methods lacks
   1418         # a virtual destructor. This is to make it less likely that people will
   1419         # declare derived virtual destructors without declaring the base
   1420         # destructor virtual.
   1421         if ((classinfo.virtual_method_line_number is not None)
   1422             and (not classinfo.has_virtual_destructor)
   1423             and (not classinfo.is_derived)):  # Only warn for base classes
   1424             error(classinfo.line_number, 'runtime/virtual', 4,
   1425                   'The class %s probably needs a virtual destructor due to '
   1426                   'having virtual method(s), one declared at line %d.'
   1427                   % (classinfo.name, classinfo.virtual_method_line_number))
   1428         # Look for mixed bool and unsigned bitfields.
   1429         if (classinfo.bool_bitfields and classinfo.unsigned_bitfields):
   1430             bool_list = ', '.join(classinfo.bool_bitfields)
   1431             unsigned_list = ', '.join(classinfo.unsigned_bitfields)
   1432             error(classinfo.line_number, 'runtime/bitfields', 5,
   1433                   'The class %s contains mixed unsigned and bool bitfields, '
   1434                   'which will pack into separate words on the MSVC compiler.\n'
   1435                   'Bool bitfields are [%s].\nUnsigned bitfields are [%s].\n'
   1436                   'Consider converting bool bitfields to unsigned.'
   1437                   % (classinfo.name, bool_list, unsigned_list))
   1438     else:
   1439         classinfo.brace_depth = brace_depth
   1440 
   1441     well_typed_bitfield = False;
   1442     # Look for bool <name> : 1 declarations.
   1443     args = search(r'\bbool\s+(\S*)\s*:\s*\d+\s*;', line)
   1444     if args:
   1445         classinfo.bool_bitfields.append('%d: %s' % (line_number, args.group(1)))
   1446         well_typed_bitfield = True;
   1447 
   1448     # Look for unsigned <name> : n declarations.
   1449     args = search(r'\bunsigned\s+(?:int\s+)?(\S+)\s*:\s*\d+\s*;', line)
   1450     if args:
   1451         classinfo.unsigned_bitfields.append('%d: %s' % (line_number, args.group(1)))
   1452         well_typed_bitfield = True;
   1453 
   1454     # Look for other bitfield declarations. We don't care about those in
   1455     # size-matching structs.
   1456     if not (well_typed_bitfield or classinfo.name.startswith('SameSizeAs') or
   1457             classinfo.name.startswith('Expected')):
   1458         args = match(r'\s*(\S+)\s+(\S+)\s*:\s*\d+\s*;', line)
   1459         if args:
   1460             error(line_number, 'runtime/bitfields', 4,
   1461                   'Member %s of class %s defined as a bitfield of type %s. '
   1462                   'Please declare all bitfields as unsigned.'
   1463                   % (args.group(2), classinfo.name, args.group(1)))
   1464 
   1465 def check_spacing_for_function_call(line, line_number, error):
   1466     """Checks for the correctness of various spacing around function calls.
   1467 
   1468     Args:
   1469       line: The text of the line to check.
   1470       line_number: The number of the line to check.
   1471       error: The function to call with any errors found.
   1472     """
   1473 
   1474     # Since function calls often occur inside if/for/foreach/while/switch
   1475     # expressions - which have their own, more liberal conventions - we
   1476     # first see if we should be looking inside such an expression for a
   1477     # function call, to which we can apply more strict standards.
   1478     function_call = line    # if there's no control flow construct, look at whole line
   1479     for pattern in (r'\bif\s*\((.*)\)\s*{',
   1480                     r'\bfor\s*\((.*)\)\s*{',
   1481                     r'\bforeach\s*\((.*)\)\s*{',
   1482                     r'\bwhile\s*\((.*)\)\s*[{;]',
   1483                     r'\bswitch\s*\((.*)\)\s*{'):
   1484         matched = search(pattern, line)
   1485         if matched:
   1486             function_call = matched.group(1)    # look inside the parens for function calls
   1487             break
   1488 
   1489     # Except in if/for/foreach/while/switch, there should never be space
   1490     # immediately inside parens (eg "f( 3, 4 )").  We make an exception
   1491     # for nested parens ( (a+b) + c ).  Likewise, there should never be
   1492     # a space before a ( when it's a function argument.  I assume it's a
   1493     # function argument when the char before the whitespace is legal in
   1494     # a function name (alnum + _) and we're not starting a macro. Also ignore
   1495     # pointers and references to arrays and functions coz they're too tricky:
   1496     # we use a very simple way to recognize these:
   1497     # " (something)(maybe-something)" or
   1498     # " (something)(maybe-something," or
   1499     # " (something)[something]"
   1500     # Note that we assume the contents of [] to be short enough that
   1501     # they'll never need to wrap.
   1502     if (  # Ignore control structures.
   1503         not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
   1504         # Ignore pointers/references to functions.
   1505         and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
   1506         # Ignore pointers/references to arrays.
   1507         and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
   1508         if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
   1509             error(line_number, 'whitespace/parens', 4,
   1510                   'Extra space after ( in function call')
   1511         elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
   1512             error(line_number, 'whitespace/parens', 2,
   1513                   'Extra space after (')
   1514         if (search(r'\w\s+\(', function_call)
   1515             and not match(r'\s*(#|typedef)', function_call)):
   1516             error(line_number, 'whitespace/parens', 4,
   1517                   'Extra space before ( in function call')
   1518         # If the ) is followed only by a newline or a { + newline, assume it's
   1519         # part of a control statement (if/while/etc), and don't complain
   1520         if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
   1521             error(line_number, 'whitespace/parens', 2,
   1522                   'Extra space before )')
   1523 
   1524 
   1525 def is_blank_line(line):
   1526     """Returns true if the given line is blank.
   1527 
   1528     We consider a line to be blank if the line is empty or consists of
   1529     only white spaces.
   1530 
   1531     Args:
   1532       line: A line of a string.
   1533 
   1534     Returns:
   1535       True, if the given line is blank.
   1536     """
   1537     return not line or line.isspace()
   1538 
   1539 
   1540 def detect_functions(clean_lines, line_number, function_state, error):
   1541     """Finds where functions start and end.
   1542 
   1543     Uses a simplistic algorithm assuming other style guidelines
   1544     (especially spacing) are followed.
   1545     Trivial bodies are unchecked, so constructors with huge initializer lists
   1546     may be missed.
   1547 
   1548     Args:
   1549       clean_lines: A CleansedLines instance containing the file.
   1550       line_number: The number of the line to check.
   1551       function_state: Current function name and lines in body so far.
   1552       error: The function to call with any errors found.
   1553     """
   1554     # Are we now past the end of a function?
   1555     if function_state.end_position.row + 1 == line_number:
   1556         function_state.end()
   1557 
   1558     # If we're in a function, don't try to detect a new one.
   1559     if function_state.in_a_function:
   1560         return
   1561 
   1562     lines = clean_lines.lines
   1563     line = lines[line_number]
   1564     raw = clean_lines.raw_lines
   1565     raw_line = raw[line_number]
   1566 
   1567     # Lines ending with a \ indicate a macro. Don't try to check them.
   1568     if raw_line.endswith('\\'):
   1569         return
   1570 
   1571     regexp = r'\s*(\w(\w|::|\*|\&|\s|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\('  # decls * & space::name( ...
   1572     match_result = match(regexp, line)
   1573     if not match_result:
   1574         return
   1575 
   1576     # If the name is all caps and underscores, figure it's a macro and
   1577     # ignore it, unless it's TEST or TEST_F.
   1578     function_name = match_result.group(1).split()[-1]
   1579     if function_name != 'TEST' and function_name != 'TEST_F' and match(r'[A-Z_]+$', function_name):
   1580         return
   1581 
   1582     joined_line = ''
   1583     for start_line_number in xrange(line_number, clean_lines.num_lines()):
   1584         start_line = clean_lines.elided[start_line_number]
   1585         joined_line += ' ' + start_line.lstrip()
   1586         body_match = search(r'{|;', start_line)
   1587         if body_match:
   1588             body_start_position = Position(start_line_number, body_match.start(0))
   1589 
   1590             # Replace template constructs with _ so that no spaces remain in the function name,
   1591             # while keeping the column numbers of other characters the same as "line".
   1592             line_with_no_templates = iteratively_replace_matches_with_char(r'<[^<>]*>', '_', line)
   1593             match_function = search(r'((\w|:|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(', line_with_no_templates)
   1594             if not match_function:
   1595                 return  # The '(' must have been inside of a template.
   1596 
   1597             # Use the column numbers from the modified line to find the
   1598             # function name in the original line.
   1599             function = line[match_function.start(1):match_function.end(1)]
   1600             function_name_start_position = Position(line_number, match_function.start(1))
   1601 
   1602             if match(r'TEST', function):    # Handle TEST... macros
   1603                 parameter_regexp = search(r'(\(.*\))', joined_line)
   1604                 if parameter_regexp:             # Ignore bad syntax
   1605                     function += parameter_regexp.group(1)
   1606             else:
   1607                 function += '()'
   1608 
   1609             parameter_start_position = Position(line_number, match_function.end(1))
   1610             parameter_end_position = close_expression(clean_lines.elided, parameter_start_position)
   1611             if parameter_end_position.row == len(clean_lines.elided):
   1612                 # No end was found.
   1613                 return
   1614 
   1615             if start_line[body_start_position.column] == ';':
   1616                 end_position = Position(body_start_position.row, body_start_position.column + 1)
   1617             else:
   1618                 end_position = close_expression(clean_lines.elided, body_start_position)
   1619 
   1620             # Check for nonsensical positions. (This happens in test cases which check code snippets.)
   1621             if parameter_end_position > body_start_position:
   1622                 return
   1623 
   1624             function_state.begin(function, function_name_start_position, body_start_position, end_position,
   1625                                  parameter_start_position, parameter_end_position, clean_lines)
   1626             return
   1627 
   1628     # No body for the function (or evidence of a non-function) was found.
   1629     error(line_number, 'readability/fn_size', 5,
   1630           'Lint failed to find start of function body.')
   1631 
   1632 
   1633 def check_for_function_lengths(clean_lines, line_number, function_state, error):
   1634     """Reports for long function bodies.
   1635 
   1636     For an overview why this is done, see:
   1637     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
   1638 
   1639     Blank/comment lines are not counted so as to avoid encouraging the removal
   1640     of vertical space and commments just to get through a lint check.
   1641     NOLINT *on the last line of a function* disables this check.
   1642 
   1643     Args:
   1644       clean_lines: A CleansedLines instance containing the file.
   1645       line_number: The number of the line to check.
   1646       function_state: Current function name and lines in body so far.
   1647       error: The function to call with any errors found.
   1648     """
   1649     lines = clean_lines.lines
   1650     line = lines[line_number]
   1651     raw = clean_lines.raw_lines
   1652     raw_line = raw[line_number]
   1653 
   1654     if function_state.end_position.row == line_number:  # last line
   1655         if not search(r'\bNOLINT\b', raw_line):
   1656             function_state.check(error, line_number)
   1657     elif not match(r'^\s*$', line):
   1658         function_state.count(line_number)  # Count non-blank/non-comment lines.
   1659 
   1660 
   1661 def _check_parameter_name_against_text(parameter, text, error):
   1662     """Checks to see if the parameter name is contained within the text.
   1663 
   1664     Return false if the check failed (i.e. an error was produced).
   1665     """
   1666 
   1667     # Treat 'lower with underscores' as a canonical form because it is
   1668     # case insensitive while still retaining word breaks. (This ensures that
   1669     # 'elate' doesn't look like it is duplicating of 'NateLate'.)
   1670     canonical_parameter_name = parameter.lower_with_underscores_name()
   1671 
   1672     # Appends "object" to all text to catch variables that did the same (but only
   1673     # do this when the parameter name is more than a single character to avoid
   1674     # flagging 'b' which may be an ok variable when used in an rgba function).
   1675     if len(canonical_parameter_name) > 1:
   1676         text = sub(r'(\w)\b', r'\1Object', text)
   1677     canonical_text = _convert_to_lower_with_underscores(text)
   1678 
   1679     # Used to detect cases like ec for ExceptionCode.
   1680     acronym = _create_acronym(text).lower()
   1681     if canonical_text.find(canonical_parameter_name) != -1 or acronym.find(canonical_parameter_name) != -1:
   1682         error(parameter.row, 'readability/parameter_name', 5,
   1683               'The parameter name "%s" adds no information, so it should be removed.' % parameter.name)
   1684         return False
   1685     return True
   1686 
   1687 
   1688 def check_function_definition_and_pass_ptr(type_text, row, location_description, error):
   1689     """Check that function definitions for use Pass*Ptr instead of *Ptr.
   1690 
   1691     Args:
   1692        type_text: A string containing the type. (For return values, it may contain more than the type.)
   1693        row: The row number of the type.
   1694        location_description: Used to indicate where the type is. This is either 'parameter' or 'return'.
   1695        error: The function to call with any errors found.
   1696     """
   1697     match_ref_or_own_ptr = '(?=\W|^)(Ref|Own)Ptr(?=\W)'
   1698     exceptions = '(?:&|\*|\*\s*=\s*0)$'
   1699     bad_type_usage = search(match_ref_or_own_ptr, type_text)
   1700     exception_usage = search(exceptions, type_text)
   1701     if not bad_type_usage or exception_usage:
   1702         return
   1703     type_name = bad_type_usage.group(0)
   1704     error(row, 'readability/pass_ptr', 5,
   1705           'The %s type should use Pass%s instead of %s.' % (location_description, type_name, type_name))
   1706 
   1707 
   1708 def check_function_definition(filename, file_extension, clean_lines, line_number, function_state, error):
   1709     """Check that function definitions for style issues.
   1710 
   1711     Specifically, check that parameter names in declarations add information.
   1712 
   1713     Args:
   1714        filename: Filename of the file that is being processed.
   1715        file_extension: The current file extension, without the leading dot.
   1716        clean_lines: A CleansedLines instance containing the file.
   1717        line_number: The number of the line to check.
   1718        function_state: Current function name and lines in body so far.
   1719        error: The function to call with any errors found.
   1720     """
   1721     if line_number != function_state.body_start_position.row:
   1722         return
   1723 
   1724     modifiers_and_return_type = function_state.modifiers_and_return_type()
   1725     if filename.find('/chromium/') != -1 and search(r'\bWEBKIT_EXPORT\b', modifiers_and_return_type):
   1726         if filename.find('/chromium/public/') == -1 and filename.find('/chromium/tests/') == -1 and filename.find('chromium/platform') == -1:
   1727             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1728                   'WEBKIT_EXPORT should only appear in the chromium public (or tests) directory.')
   1729         elif not file_extension == "h":
   1730             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1731                   'WEBKIT_EXPORT should only be used in header files.')
   1732         elif not function_state.is_declaration or search(r'\binline\b', modifiers_and_return_type):
   1733             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1734                   'WEBKIT_EXPORT should not be used on a function with a body.')
   1735         elif function_state.is_pure:
   1736             error(function_state.function_name_start_position.row, 'readability/webkit_export', 5,
   1737                   'WEBKIT_EXPORT should not be used with a pure virtual function.')
   1738 
   1739     check_function_definition_and_pass_ptr(modifiers_and_return_type, function_state.function_name_start_position.row, 'return', error)
   1740 
   1741     parameter_list = function_state.parameter_list()
   1742     for parameter in parameter_list:
   1743         check_function_definition_and_pass_ptr(parameter.type, parameter.row, 'parameter', error)
   1744 
   1745         # Do checks specific to function declarations and parameter names.
   1746         if not function_state.is_declaration or not parameter.name:
   1747             continue
   1748 
   1749         # Check the parameter name against the function name for single parameter set functions.
   1750         if len(parameter_list) == 1 and match('set[A-Z]', function_state.current_function):
   1751             trimmed_function_name = function_state.current_function[len('set'):]
   1752             if not _check_parameter_name_against_text(parameter, trimmed_function_name, error):
   1753                 continue  # Since an error was noted for this name, move to the next parameter.
   1754 
   1755         # Check the parameter name against the type.
   1756         if not _check_parameter_name_against_text(parameter, parameter.type, error):
   1757             continue  # Since an error was noted for this name, move to the next parameter.
   1758 
   1759 
   1760 def check_pass_ptr_usage(clean_lines, line_number, function_state, error):
   1761     """Check for proper usage of Pass*Ptr.
   1762 
   1763     Currently this is limited to detecting declarations of Pass*Ptr
   1764     variables inside of functions.
   1765 
   1766     Args:
   1767       clean_lines: A CleansedLines instance containing the file.
   1768       line_number: The number of the line to check.
   1769       function_state: Current function name and lines in body so far.
   1770       error: The function to call with any errors found.
   1771     """
   1772     if not function_state.in_a_function:
   1773         return
   1774 
   1775     lines = clean_lines.lines
   1776     line = lines[line_number]
   1777     if line_number > function_state.body_start_position.row:
   1778         matched_pass_ptr = match(r'^\s*Pass([A-Z][A-Za-z]*)Ptr<', line)
   1779         if matched_pass_ptr:
   1780             type_name = 'Pass%sPtr' % matched_pass_ptr.group(1)
   1781             error(line_number, 'readability/pass_ptr', 5,
   1782                   'Local variables should never be %s (see '
   1783                   'http://webkit.org/coding/RefPtr.html).' % type_name)
   1784 
   1785 
   1786 def check_for_leaky_patterns(clean_lines, line_number, function_state, error):
   1787     """Check for constructs known to be leak prone.
   1788     Args:
   1789       clean_lines: A CleansedLines instance containing the file.
   1790       line_number: The number of the line to check.
   1791       function_state: Current function name and lines in body so far.
   1792       error: The function to call with any errors found.
   1793     """
   1794     lines = clean_lines.lines
   1795     line = lines[line_number]
   1796 
   1797     matched_get_dc = search(r'\b(?P<function_name>GetDC(Ex)?)\s*\(', line)
   1798     if matched_get_dc:
   1799         error(line_number, 'runtime/leaky_pattern', 5,
   1800               'Use the class HWndDC instead of calling %s to avoid potential '
   1801               'memory leaks.' % matched_get_dc.group('function_name'))
   1802 
   1803     matched_create_dc = search(r'\b(?P<function_name>Create(Compatible)?DC)\s*\(', line)
   1804     matched_own_dc = search(r'\badoptPtr\b', line)
   1805     if matched_create_dc and not matched_own_dc:
   1806         error(line_number, 'runtime/leaky_pattern', 5,
   1807               'Use adoptPtr and OwnPtr<HDC> when calling %s to avoid potential '
   1808               'memory leaks.' % matched_create_dc.group('function_name'))
   1809 
   1810 
   1811 def check_spacing(file_extension, clean_lines, line_number, error):
   1812     """Checks for the correctness of various spacing issues in the code.
   1813 
   1814     Things we check for: spaces around operators, spaces after
   1815     if/for/while/switch, no spaces around parens in function calls, two
   1816     spaces between code and comment, don't start a block with a blank
   1817     line, don't end a function with a blank line, don't have too many
   1818     blank lines in a row.
   1819 
   1820     Args:
   1821       file_extension: The current file extension, without the leading dot.
   1822       clean_lines: A CleansedLines instance containing the file.
   1823       line_number: The number of the line to check.
   1824       error: The function to call with any errors found.
   1825     """
   1826 
   1827     raw = clean_lines.raw_lines
   1828     line = raw[line_number]
   1829 
   1830     # Before nixing comments, check if the line is blank for no good
   1831     # reason.  This includes the first line after a block is opened, and
   1832     # blank lines at the end of a function (ie, right before a line like '}').
   1833     if is_blank_line(line):
   1834         elided = clean_lines.elided
   1835         previous_line = elided[line_number - 1]
   1836         previous_brace = previous_line.rfind('{')
   1837         # FIXME: Don't complain if line before blank line, and line after,
   1838         #        both start with alnums and are indented the same amount.
   1839         #        This ignores whitespace at the start of a namespace block
   1840         #        because those are not usually indented.
   1841         if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
   1842             and previous_line[:previous_brace].find('namespace') == -1):
   1843             # OK, we have a blank line at the start of a code block.  Before we
   1844             # complain, we check if it is an exception to the rule: The previous
   1845             # non-empty line has the parameters of a function header that are indented
   1846             # 4 spaces (because they did not fit in a 80 column line when placed on
   1847             # the same line as the function name).  We also check for the case where
   1848             # the previous line is indented 6 spaces, which may happen when the
   1849             # initializers of a constructor do not fit into a 80 column line.
   1850             exception = False
   1851             if match(r' {6}\w', previous_line):  # Initializer list?
   1852                 # We are looking for the opening column of initializer list, which
   1853                 # should be indented 4 spaces to cause 6 space indentation afterwards.
   1854                 search_position = line_number - 2
   1855                 while (search_position >= 0
   1856                        and match(r' {6}\w', elided[search_position])):
   1857                     search_position -= 1
   1858                 exception = (search_position >= 0
   1859                              and elided[search_position][:5] == '    :')
   1860             else:
   1861                 # Search for the function arguments or an initializer list.  We use a
   1862                 # simple heuristic here: If the line is indented 4 spaces; and we have a
   1863                 # closing paren, without the opening paren, followed by an opening brace
   1864                 # or colon (for initializer lists) we assume that it is the last line of
   1865                 # a function header.  If we have a colon indented 4 spaces, it is an
   1866                 # initializer list.
   1867                 exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
   1868                                    previous_line)
   1869                              or match(r' {4}:', previous_line))
   1870 
   1871             if not exception:
   1872                 error(line_number, 'whitespace/blank_line', 2,
   1873                       'Blank line at the start of a code block.  Is this needed?')
   1874         # This doesn't ignore whitespace at the end of a namespace block
   1875         # because that is too hard without pairing open/close braces;
   1876         # however, a special exception is made for namespace closing
   1877         # brackets which have a comment containing "namespace".
   1878         #
   1879         # Also, ignore blank lines at the end of a block in a long if-else
   1880         # chain, like this:
   1881         #   if (condition1) {
   1882         #     // Something followed by a blank line
   1883         #
   1884         #   } else if (condition2) {
   1885         #     // Something else
   1886         #   }
   1887         if line_number + 1 < clean_lines.num_lines():
   1888             next_line = raw[line_number + 1]
   1889             if (next_line
   1890                 and match(r'\s*}', next_line)
   1891                 and next_line.find('namespace') == -1
   1892                 and next_line.find('} else ') == -1):
   1893                 error(line_number, 'whitespace/blank_line', 3,
   1894                       'Blank line at the end of a code block.  Is this needed?')
   1895 
   1896     # Next, we check for proper spacing with respect to comments.
   1897     comment_position = line.find('//')
   1898     if comment_position != -1:
   1899         # Check if the // may be in quotes.  If so, ignore it
   1900         # Comparisons made explicit for clarity
   1901         if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
   1902             # Allow one space before end of line comment.
   1903             if (not match(r'^\s*$', line[:comment_position])
   1904                 and (comment_position >= 1
   1905                 and ((line[comment_position - 1] not in string.whitespace)
   1906                      or (comment_position >= 2
   1907                          and line[comment_position - 2] in string.whitespace)))):
   1908                 error(line_number, 'whitespace/comments', 5,
   1909                       'One space before end of line comments')
   1910             # There should always be a space between the // and the comment
   1911             commentend = comment_position + 2
   1912             if commentend < len(line) and not line[commentend] == ' ':
   1913                 # but some lines are exceptions -- e.g. if they're big
   1914                 # comment delimiters like:
   1915                 # //----------------------------------------------------------
   1916                 # or they begin with multiple slashes followed by a space:
   1917                 # //////// Header comment
   1918                 matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
   1919                            or search(r'^/+ ', line[commentend:]))
   1920                 if not matched:
   1921                     error(line_number, 'whitespace/comments', 4,
   1922                           'Should have a space between // and comment')
   1923 
   1924             # There should only be one space after punctuation in a comment.
   1925             if search(r'[.!?,;:]\s\s+\w', line[comment_position:]):
   1926                 error(line_number, 'whitespace/comments', 5,
   1927                       'Should have only a single space after a punctuation in a comment.')
   1928 
   1929     line = clean_lines.elided[line_number]  # get rid of comments and strings
   1930 
   1931     # Don't try to do spacing checks for operator methods
   1932     line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>|\+=|-=|\*=|/=|%=|&=|\|=|^=|<<=|>>=|/)\(', 'operator\(', line)
   1933     # Don't try to do spacing checks for #include or #import statements at
   1934     # minimum because it messes up checks for spacing around /
   1935     if match(r'\s*#\s*(?:include|import)', line):
   1936         return
   1937     if search(r'[\w.]=[\w.]', line):
   1938         error(line_number, 'whitespace/operators', 4,
   1939               'Missing spaces around =')
   1940 
   1941     # FIXME: It's not ok to have spaces around binary operators like .
   1942 
   1943     # You should always have whitespace around binary operators.
   1944     # Alas, we can't test < or > because they're legitimately used sans spaces
   1945     # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
   1946     # only if it's not template params list spilling into the next line.
   1947     matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line)
   1948     if not matched:
   1949         # Note that while it seems that the '<[^<]*' term in the following
   1950         # regexp could be simplified to '<.*', which would indeed match
   1951         # the same class of strings, the [^<] means that searching for the
   1952         # regexp takes linear rather than quadratic time.
   1953         if not search(r'<[^<]*,\s*$', line):  # template params spill
   1954             matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
   1955     if matched:
   1956         error(line_number, 'whitespace/operators', 3,
   1957               'Missing spaces around %s' % matched.group(1))
   1958 
   1959     # There shouldn't be space around unary operators
   1960     matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
   1961     if matched:
   1962         error(line_number, 'whitespace/operators', 4,
   1963               'Extra space for operator %s' % matched.group(1))
   1964 
   1965     # A pet peeve of mine: no spaces after an if, while, switch, or for
   1966     matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
   1967     if matched:
   1968         error(line_number, 'whitespace/parens', 5,
   1969               'Missing space before ( in %s' % matched.group(1))
   1970 
   1971     # For if/for/foreach/while/switch, the left and right parens should be
   1972     # consistent about how many spaces are inside the parens, and
   1973     # there should either be zero or one spaces inside the parens.
   1974     # We don't want: "if ( foo)" or "if ( foo   )".
   1975     # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   1976     matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<remainder>.*)$', line)
   1977     if matched:
   1978         statement = matched.group('statement')
   1979         condition, rest = up_to_unmatched_closing_paren(matched.group('remainder'))
   1980         if condition is not None:
   1981             condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition)
   1982             if condition_match:
   1983                 n_leading = len(condition_match.group('leading'))
   1984                 n_trailing = len(condition_match.group('trailing'))
   1985                 if n_leading != 0:
   1986                     for_exception = statement == 'for' and condition.startswith(' ;')
   1987                     if not for_exception:
   1988                         error(line_number, 'whitespace/parens', 5,
   1989                               'Extra space after ( in %s' % statement)
   1990                 if n_trailing != 0:
   1991                     for_exception = statement == 'for' and condition.endswith('; ')
   1992                     if not for_exception:
   1993                         error(line_number, 'whitespace/parens', 5,
   1994                               'Extra space before ) in %s' % statement)
   1995 
   1996             # Do not check for more than one command in macros
   1997             in_preprocessor_directive = match(r'\s*#', line)
   1998             if not in_preprocessor_directive and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest):
   1999                 error(line_number, 'whitespace/parens', 4,
   2000                       'More than one command on the same line in %s' % statement)
   2001 
   2002     # You should always have a space after a comma (either as fn arg or operator)
   2003     if search(r',[^\s]', line):
   2004         error(line_number, 'whitespace/comma', 3,
   2005               'Missing space after ,')
   2006 
   2007     matched = search(r'^\s*(?P<token1>[a-zA-Z0-9_\*&]+)\s\s+(?P<token2>[a-zA-Z0-9_\*&]+)', line)
   2008     if matched:
   2009         error(line_number, 'whitespace/declaration', 3,
   2010               'Extra space between %s and %s' % (matched.group('token1'), matched.group('token2')))
   2011 
   2012     if file_extension == 'cpp':
   2013         # C++ should have the & or * beside the type not the variable name.
   2014         matched = match(r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+', line)
   2015         if matched:
   2016             error(line_number, 'whitespace/declaration', 3,
   2017                   'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
   2018 
   2019     elif file_extension == 'c':
   2020         # C Pointer declaration should have the * beside the variable not the type name.
   2021         matched = search(r'^\s*\w+\*\s+\w+', line)
   2022         if matched:
   2023             error(line_number, 'whitespace/declaration', 3,
   2024                   'Declaration has space between * and variable name in %s' % matched.group(0).strip())
   2025 
   2026     # Next we will look for issues with function calls.
   2027     check_spacing_for_function_call(line, line_number, error)
   2028 
   2029     # Except after an opening paren, you should have spaces before your braces.
   2030     # And since you should never have braces at the beginning of a line, this is
   2031     # an easy test.
   2032     if search(r'[^ ({]{', line):
   2033         error(line_number, 'whitespace/braces', 5,
   2034               'Missing space before {')
   2035 
   2036     # Make sure '} else {' has spaces.
   2037     if search(r'}else', line):
   2038         error(line_number, 'whitespace/braces', 5,
   2039               'Missing space before else')
   2040 
   2041     # You shouldn't have spaces before your brackets, except maybe after
   2042     # 'delete []' or 'new char * []'.
   2043     if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
   2044         error(line_number, 'whitespace/braces', 5,
   2045               'Extra space before [')
   2046 
   2047     # There should always be a single space in between braces on the same line.
   2048     if search(r'\{\}', line):
   2049         error(line_number, 'whitespace/braces', 5, 'Missing space inside { }.')
   2050     if search(r'\{\s\s+\}', line):
   2051         error(line_number, 'whitespace/braces', 5, 'Too many spaces inside { }.')
   2052 
   2053     # You shouldn't have a space before a semicolon at the end of the line.
   2054     # There's a special case for "for" since the style guide allows space before
   2055     # the semicolon there.
   2056     if search(r':\s*;\s*$', line):
   2057         error(line_number, 'whitespace/semicolon', 5,
   2058               'Semicolon defining empty statement. Use { } instead.')
   2059     elif search(r'^\s*;\s*$', line):
   2060         error(line_number, 'whitespace/semicolon', 5,
   2061               'Line contains only semicolon. If this should be an empty statement, '
   2062               'use { } instead.')
   2063     elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
   2064         error(line_number, 'whitespace/semicolon', 5,
   2065               'Extra space before last semicolon. If this should be an empty '
   2066               'statement, use { } instead.')
   2067     elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
   2068           and line.count('(') == line.count(')')
   2069           # Allow do {} while();
   2070           and not search(r'}\s*while', line)):
   2071         error(line_number, 'whitespace/semicolon', 5,
   2072               'Semicolon defining empty statement for this loop. Use { } instead.')
   2073 
   2074 
   2075 def get_previous_non_blank_line(clean_lines, line_number):
   2076     """Return the most recent non-blank line and its line number.
   2077 
   2078     Args:
   2079       clean_lines: A CleansedLines instance containing the file contents.
   2080       line_number: The number of the line to check.
   2081 
   2082     Returns:
   2083       A tuple with two elements.  The first element is the contents of the last
   2084       non-blank line before the current line, or the empty string if this is the
   2085       first non-blank line.  The second is the line number of that line, or -1
   2086       if this is the first non-blank line.
   2087     """
   2088 
   2089     previous_line_number = line_number - 1
   2090     while previous_line_number >= 0:
   2091         previous_line = clean_lines.elided[previous_line_number]
   2092         if not is_blank_line(previous_line):     # if not a blank line...
   2093             return (previous_line, previous_line_number)
   2094         previous_line_number -= 1
   2095     return ('', -1)
   2096 
   2097 
   2098 def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error):
   2099     """Looks for indentation errors inside of namespaces.
   2100 
   2101     Args:
   2102       clean_lines: A CleansedLines instance containing the file.
   2103       line_number: The number of the line to check.
   2104       file_extension: The extension (dot not included) of the file.
   2105       file_state: A _FileState instance which maintains information about
   2106                   the state of things in the file.
   2107       error: The function to call with any errors found.
   2108     """
   2109 
   2110     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2111 
   2112     namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
   2113     if not namespace_match:
   2114         return
   2115 
   2116     current_indentation_level = len(namespace_match.group('namespace_indentation'))
   2117     if current_indentation_level > 0:
   2118         # Don't warn about an indented namespace if we already warned about indented code.
   2119         if not file_state.did_inside_namespace_indent_warning():
   2120             error(line_number, 'whitespace/indent', 4,
   2121                   'namespace should never be indented.')
   2122         return
   2123     looking_for_semicolon = False;
   2124     line_offset = 0
   2125     in_preprocessor_directive = False;
   2126     for current_line in clean_lines.elided[line_number + 1:]:
   2127         line_offset += 1
   2128         if not current_line.strip():
   2129             continue
   2130         if not current_indentation_level:
   2131             if not (in_preprocessor_directive or looking_for_semicolon):
   2132                 if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning():
   2133                     file_state.set_did_inside_namespace_indent_warning()
   2134                     error(line_number + line_offset, 'whitespace/indent', 4,
   2135                           'Code inside a namespace should not be indented.')
   2136             if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax.
   2137                 in_preprocessor_directive = current_line[-1] == '\\'
   2138             else:
   2139                 looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\')
   2140         else:
   2141             looking_for_semicolon = False; # If we have a brace we may not need a semicolon.
   2142         current_indentation_level += current_line.count('{') - current_line.count('}')
   2143         if current_indentation_level < 0:
   2144             break;
   2145 
   2146 
   2147 def check_enum_casing(clean_lines, line_number, enum_state, error):
   2148     """Looks for incorrectly named enum values.
   2149 
   2150     Args:
   2151       clean_lines: A CleansedLines instance containing the file.
   2152       line_number: The number of the line to check.
   2153       enum_state: A _EnumState instance which maintains enum declaration state.
   2154       error: The function to call with any errors found.
   2155     """
   2156 
   2157     enum_state.is_webidl_enum |= bool(match(r'\s*// Web(?:Kit)?IDL enum\s*$', clean_lines.raw_lines[line_number]))
   2158 
   2159     line = clean_lines.elided[line_number]  # Get rid of comments and strings.
   2160     if not enum_state.process_clean_line(line):
   2161         error(line_number, 'readability/enum_casing', 4,
   2162               'enum members should use InterCaps with an initial capital letter.')
   2163 
   2164 def check_directive_indentation(clean_lines, line_number, file_state, error):
   2165     """Looks for indentation of preprocessor directives.
   2166 
   2167     Args:
   2168       clean_lines: A CleansedLines instance containing the file.
   2169       line_number: The number of the line to check.
   2170       file_state: A _FileState instance which maintains information about
   2171                   the state of things in the file.
   2172       error: The function to call with any errors found.
   2173     """
   2174 
   2175     line = clean_lines.elided[line_number]  # Get rid of comments and strings.
   2176 
   2177     indented_preprocessor_directives = match(r'\s+#', line)
   2178     if not indented_preprocessor_directives:
   2179         return
   2180 
   2181     error(line_number, 'whitespace/indent', 4, 'preprocessor directives (e.g., #ifdef, #define, #import) should never be indented.')
   2182 
   2183 
   2184 def get_initial_spaces_for_line(clean_line):
   2185     initial_spaces = 0
   2186     while initial_spaces < len(clean_line) and clean_line[initial_spaces] == ' ':
   2187         initial_spaces += 1
   2188     return initial_spaces
   2189 
   2190 
   2191 def check_indentation_amount(clean_lines, line_number, error):
   2192     line = clean_lines.elided[line_number]
   2193     initial_spaces = get_initial_spaces_for_line(line)
   2194 
   2195     if initial_spaces % 4:
   2196         error(line_number, 'whitespace/indent', 3,
   2197               'Weird number of spaces at line-start.  Are you using a 4-space indent?')
   2198         return
   2199 
   2200     previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2201     if not previous_line.strip() or match(r'\s*\w+\s*:\s*$', previous_line) or previous_line[0] == '#':
   2202         return
   2203 
   2204     previous_line_initial_spaces = get_initial_spaces_for_line(previous_line)
   2205     if initial_spaces > previous_line_initial_spaces + 4:
   2206         error(line_number, 'whitespace/indent', 3, 'When wrapping a line, only indent 4 spaces.')
   2207 
   2208 
   2209 def check_using_std(clean_lines, line_number, file_state, error):
   2210     """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'.
   2211 
   2212     Args:
   2213       clean_lines: A CleansedLines instance containing the file.
   2214       line_number: The number of the line to check.
   2215       file_state: A _FileState instance which maintains information about
   2216                   the state of things in the file.
   2217       error: The function to call with any errors found.
   2218     """
   2219 
   2220     # This check doesn't apply to C or Objective-C implementation files.
   2221     if file_state.is_c_or_objective_c():
   2222         return
   2223 
   2224     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2225 
   2226     using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line)
   2227     if not using_std_match:
   2228         return
   2229 
   2230     method_name = using_std_match.group('method_name')
   2231     error(line_number, 'build/using_std', 4,
   2232           "Use 'using namespace std;' instead of 'using std::%s;'." % method_name)
   2233 
   2234 
   2235 def check_max_min_macros(clean_lines, line_number, file_state, error):
   2236     """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min().
   2237 
   2238     Args:
   2239       clean_lines: A CleansedLines instance containing the file.
   2240       line_number: The number of the line to check.
   2241       file_state: A _FileState instance which maintains information about
   2242                   the state of things in the file.
   2243       error: The function to call with any errors found.
   2244     """
   2245 
   2246     # This check doesn't apply to C or Objective-C implementation files.
   2247     if file_state.is_c_or_objective_c():
   2248         return
   2249 
   2250     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2251 
   2252     max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line)
   2253     if not max_min_macros_search:
   2254         return
   2255 
   2256     max_min_macro = max_min_macros_search.group('max_min_macro')
   2257     max_min_macro_lower = max_min_macro.lower()
   2258     error(line_number, 'runtime/max_min_macros', 4,
   2259           'Use std::%s() or std::%s<type>() instead of the %s() macro.'
   2260           % (max_min_macro_lower, max_min_macro_lower, max_min_macro))
   2261 
   2262 
   2263 def check_ctype_functions(clean_lines, line_number, file_state, error):
   2264     """Looks for use of the standard functions in ctype.h and suggest they be replaced
   2265        by use of equivilent ones in <wtf/ASCIICType.h>?.
   2266 
   2267     Args:
   2268       clean_lines: A CleansedLines instance containing the file.
   2269       line_number: The number of the line to check.
   2270       file_state: A _FileState instance which maintains information about
   2271                   the state of things in the file.
   2272       error: The function to call with any errors found.
   2273     """
   2274 
   2275     line = clean_lines.elided[line_number]  # Get rid of comments and strings.
   2276 
   2277     ctype_function_search = search(r'\b(?P<ctype_function>(isalnum|isalpha|isascii|isblank|iscntrl|isdigit|isgraph|islower|isprint|ispunct|isspace|isupper|isxdigit|toascii|tolower|toupper))\s*\(', line)
   2278     if not ctype_function_search:
   2279         return
   2280 
   2281     ctype_function = ctype_function_search.group('ctype_function')
   2282     error(line_number, 'runtime/ctype_function', 4,
   2283           'Use equivelent function in <wtf/ASCIICType.h> instead of the %s() function.'
   2284           % (ctype_function))
   2285 
   2286 def check_switch_indentation(clean_lines, line_number, error):
   2287     """Looks for indentation errors inside of switch statements.
   2288 
   2289     Args:
   2290       clean_lines: A CleansedLines instance containing the file.
   2291       line_number: The number of the line to check.
   2292       error: The function to call with any errors found.
   2293     """
   2294 
   2295     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2296 
   2297     switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
   2298     if not switch_match:
   2299         return
   2300 
   2301     switch_indentation = switch_match.group('switch_indentation')
   2302     inner_indentation = switch_indentation + ' ' * 4
   2303     line_offset = 0
   2304     encountered_nested_switch = False
   2305 
   2306     for current_line in clean_lines.elided[line_number + 1:]:
   2307         line_offset += 1
   2308 
   2309         # Skip not only empty lines but also those with preprocessor directives.
   2310         if current_line.strip() == '' or current_line.startswith('#'):
   2311             continue
   2312 
   2313         if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
   2314             # Complexity alarm - another switch statement nested inside the one
   2315             # that we're currently testing. We'll need to track the extent of
   2316             # that inner switch if the upcoming label tests are still supposed
   2317             # to work correctly. Let's not do that; instead, we'll finish
   2318             # checking this line, and then leave it like that. Assuming the
   2319             # indentation is done consistently (even if incorrectly), this will
   2320             # still catch all indentation issues in practice.
   2321             encountered_nested_switch = True
   2322 
   2323         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   2324         current_indentation = current_indentation_match.group('indentation')
   2325         remaining_line = current_indentation_match.group('remaining_line')
   2326 
   2327         # End the check at the end of the switch statement.
   2328         if remaining_line.startswith('}') and current_indentation == switch_indentation:
   2329             break
   2330         # Case and default branches should not be indented. The regexp also
   2331         # catches single-line cases like "default: break;" but does not trigger
   2332         # on stuff like "Document::Foo();".
   2333         elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
   2334             if current_indentation != switch_indentation:
   2335                 error(line_number + line_offset, 'whitespace/indent', 4,
   2336                       'A case label should not be indented, but line up with its switch statement.')
   2337                 # Don't throw an error for multiple badly indented labels,
   2338                 # one should be enough to figure out the problem.
   2339                 break
   2340         # We ignore goto labels at the very beginning of a line.
   2341         elif match(r'\w+\s*:\s*$', remaining_line):
   2342             continue
   2343         # It's not a goto label, so check if it's indented at least as far as
   2344         # the switch statement plus one more level of indentation.
   2345         elif not current_indentation.startswith(inner_indentation):
   2346             error(line_number + line_offset, 'whitespace/indent', 4,
   2347                   'Non-label code inside switch statements should be indented.')
   2348             # Don't throw an error for multiple badly indented statements,
   2349             # one should be enough to figure out the problem.
   2350             break
   2351 
   2352         if encountered_nested_switch:
   2353             break
   2354 
   2355 
   2356 def check_braces(clean_lines, line_number, error):
   2357     """Looks for misplaced braces (e.g. at the end of line).
   2358 
   2359     Args:
   2360       clean_lines: A CleansedLines instance containing the file.
   2361       line_number: The number of the line to check.
   2362       error: The function to call with any errors found.
   2363     """
   2364 
   2365     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2366 
   2367     if match(r'\s*{\s*$', line):
   2368         # We allow an open brace to start a line in the case where someone
   2369         # is using braces for function definition or in a block to
   2370         # explicitly create a new scope, which is commonly used to control
   2371         # the lifetime of stack-allocated variables.  We don't detect this
   2372         # perfectly: we just don't complain if the last non-whitespace
   2373         # character on the previous non-blank line is ';', ':', '{', '}',
   2374         # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
   2375         # We also allow '#' for #endif and '=' for array initialization.
   2376         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2377         if ((not search(r'[;:}{)=]\s*$|\)\s*((const|OVERRIDE)\s*)*\s*$', previous_line)
   2378              or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
   2379             and previous_line.find('#') < 0):
   2380             error(line_number, 'whitespace/braces', 4,
   2381                   'This { should be at the end of the previous line')
   2382     elif (search(r'\)\s*(((const|OVERRIDE)\s*)*\s*)?{\s*$', line)
   2383           and line.count('(') == line.count(')')
   2384           and not search(r'\b(if|for|foreach|while|switch)\b', line)
   2385           and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)):
   2386         error(line_number, 'whitespace/braces', 4,
   2387               'Place brace on its own line for function definitions.')
   2388 
   2389     # An else clause should be on the same line as the preceding closing brace.
   2390     if match(r'\s*else\s*', line):
   2391         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   2392         if match(r'\s*}\s*$', previous_line):
   2393             error(line_number, 'whitespace/newline', 4,
   2394                   'An else should appear on the same line as the preceding }')
   2395 
   2396     # Likewise, an else should never have the else clause on the same line
   2397     if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
   2398         error(line_number, 'whitespace/newline', 4,
   2399               'Else clause should never be on same line as else (use 2 lines)')
   2400 
   2401     # In the same way, a do/while should never be on one line
   2402     if match(r'\s*do [^\s{]', line):
   2403         error(line_number, 'whitespace/newline', 4,
   2404               'do/while clauses should not be on a single line')
   2405 
   2406     # Braces shouldn't be followed by a ; unless they're defining a struct
   2407     # or initializing an array.
   2408     # We can't tell in general, but we can for some common cases.
   2409     previous_line_number = line_number
   2410     while True:
   2411         (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
   2412         if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
   2413             line = previous_line + line
   2414         else:
   2415             break
   2416     if (search(r'{.*}\s*;', line)
   2417         and line.count('{') == line.count('}')
   2418         and not search(r'struct|class|enum|\s*=\s*{', line)):
   2419         error(line_number, 'readability/braces', 4,
   2420               "You don't need a ; after a }")
   2421 
   2422 
   2423 def check_exit_statement_simplifications(clean_lines, line_number, error):
   2424     """Looks for else or else-if statements that should be written as an
   2425     if statement when the prior if concludes with a return, break, continue or
   2426     goto statement.
   2427 
   2428     Args:
   2429       clean_lines: A CleansedLines instance containing the file.
   2430       line_number: The number of the line to check.
   2431       error: The function to call with any errors found.
   2432     """
   2433 
   2434     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   2435 
   2436     else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
   2437     if not else_match:
   2438         return
   2439 
   2440     else_indentation = else_match.group('else_indentation')
   2441     inner_indentation = else_indentation + ' ' * 4
   2442 
   2443     previous_lines = clean_lines.elided[:line_number]
   2444     previous_lines.reverse()
   2445     line_offset = 0
   2446     encountered_exit_statement = False
   2447 
   2448     for current_line in previous_lines:
   2449         line_offset -= 1
   2450 
   2451         # Skip not only empty lines but also those with preprocessor directives
   2452         # and goto labels.
   2453         if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
   2454             continue
   2455 
   2456         # Skip lines with closing braces on the original indentation level.
   2457         # Even though the styleguide says they should be on the same line as
   2458         # the "else if" statement, we also want to check for instances where
   2459         # the current code does not comply with the coding style. Thus, ignore
   2460         # these lines and proceed to the line before that.
   2461         if current_line == else_indentation + '}':
   2462             continue
   2463 
   2464         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   2465         current_indentation = current_indentation_match.group('indentation')
   2466         remaining_line = current_indentation_match.group('remaining_line')
   2467 
   2468         # As we're going up the lines, the first real statement to encounter
   2469         # has to be an exit statement (return, break, continue or goto) -
   2470         # otherwise, this check doesn't apply.
   2471         if not encountered_exit_statement:
   2472             # We only want to find exit statements if they are on exactly
   2473             # the same level of indentation as expected from the code inside
   2474             # the block. If the indentation doesn't strictly match then we
   2475             # might have a nested if or something, which must be ignored.
   2476             if current_indentation != inner_indentation:
   2477                 break
   2478             if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
   2479                 encountered_exit_statement = True
   2480                 continue
   2481             break
   2482 
   2483         # When code execution reaches this point, we've found an exit statement
   2484         # as last statement of the previous block. Now we only need to make
   2485         # sure that the block belongs to an "if", then we can throw an error.
   2486 
   2487         # Skip lines with opening braces on the original indentation level,
   2488         # similar to the closing braces check above. ("if (condition)\n{")
   2489         if current_line == else_indentation + '{':
   2490             continue
   2491 
   2492         # Skip everything that's further indented than our "else" or "else if".
   2493         if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
   2494             continue
   2495 
   2496         # So we've got a line with same (or less) indentation. Is it an "if"?
   2497         # If yes: throw an error. If no: don't throw an error.
   2498         # Whatever the outcome, this is the end of our loop.
   2499         if match(r'if\s*\(', remaining_line):
   2500             if else_match.start('else') != -1:
   2501                 error(line_number + line_offset, 'readability/control_flow', 4,
   2502                       'An else statement can be removed when the prior "if" '
   2503                       'concludes with a return, break, continue or goto statement.')
   2504             else:
   2505                 error(line_number + line_offset, 'readability/control_flow', 4,
   2506                       'An else if statement should be written as an if statement '
   2507                       'when the prior "if" concludes with a return, break, '
   2508                       'continue or goto statement.')
   2509         break
   2510 
   2511 
   2512 def replaceable_check(operator, macro, line):
   2513     """Determine whether a basic CHECK can be replaced with a more specific one.
   2514 
   2515     For example suggest using CHECK_EQ instead of CHECK(a == b) and
   2516     similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
   2517 
   2518     Args:
   2519       operator: The C++ operator used in the CHECK.
   2520       macro: The CHECK or EXPECT macro being called.
   2521       line: The current source line.
   2522 
   2523     Returns:
   2524       True if the CHECK can be replaced with a more specific one.
   2525     """
   2526 
   2527     # This matches decimal and hex integers, strings, and chars (in that order).
   2528     match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
   2529 
   2530     # Expression to match two sides of the operator with something that
   2531     # looks like a literal, since CHECK(x == iterator) won't compile.
   2532     # This means we can't catch all the cases where a more specific
   2533     # CHECK is possible, but it's less annoying than dealing with
   2534     # extraneous warnings.
   2535     match_this = (r'\s*' + macro + r'\((\s*' +
   2536                   match_constant + r'\s*' + operator + r'[^<>].*|'
   2537                   r'.*[^<>]' + operator + r'\s*' + match_constant +
   2538                   r'\s*\))')
   2539 
   2540     # Don't complain about CHECK(x == NULL) or similar because
   2541     # CHECK_EQ(x, NULL) won't compile (requires a cast).
   2542     # Also, don't complain about more complex boolean expressions
   2543     # involving && or || such as CHECK(a == b || c == d).
   2544     return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
   2545 
   2546 
   2547 def check_check(clean_lines, line_number, error):
   2548     """Checks the use of CHECK and EXPECT macros.
   2549 
   2550     Args:
   2551       clean_lines: A CleansedLines instance containing the file.
   2552       line_number: The number of the line to check.
   2553       error: The function to call with any errors found.
   2554     """
   2555 
   2556     # Decide the set of replacement macros that should be suggested
   2557     raw_lines = clean_lines.raw_lines
   2558     current_macro = ''
   2559     for macro in _CHECK_MACROS:
   2560         if raw_lines[line_number].find(macro) >= 0:
   2561             current_macro = macro
   2562             break
   2563     if not current_macro:
   2564         # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
   2565         return
   2566 
   2567     line = clean_lines.elided[line_number]        # get rid of comments and strings
   2568 
   2569     # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
   2570     for operator in ['==', '!=', '>=', '>', '<=', '<']:
   2571         if replaceable_check(operator, current_macro, line):
   2572             error(line_number, 'readability/check', 2,
   2573                   'Consider using %s instead of %s(a %s b)' % (
   2574                       _CHECK_REPLACEMENT[current_macro][operator],
   2575                       current_macro, operator))
   2576             break
   2577 
   2578 
   2579 def check_for_comparisons_to_zero(clean_lines, line_number, error):
   2580     # Get the line without comments and strings.
   2581     line = clean_lines.elided[line_number]
   2582 
   2583     # Include NULL here so that users don't have to convert NULL to 0 first and then get this error.
   2584     if search(r'[=!]=\s*(NULL|0|true|false)[^\w.]', line) or search(r'[^\w.](NULL|0|true|false)\s*[=!]=', line):
   2585         if not search('LIKELY', line) and not search('UNLIKELY', line):
   2586             error(line_number, 'readability/comparison_to_zero', 5,
   2587                   'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.')
   2588 
   2589 
   2590 def check_for_null(clean_lines, line_number, file_state, error):
   2591     # This check doesn't apply to C or Objective-C implementation files.
   2592     if file_state.is_c_or_objective_c():
   2593         return
   2594 
   2595     line = clean_lines.elided[line_number]
   2596 
   2597     # Don't warn about NULL usage in g_*(). See Bug 32858 and 39372.
   2598     if search(r'\bg(_[a-z]+)+\b', line):
   2599         return
   2600 
   2601     # Don't warn about NULL usage in gst_*(). See Bug 70498.
   2602     if search(r'\bgst(_[a-z]+)+\b', line):
   2603         return
   2604 
   2605     # Don't warn about NULL usage in gdk_pixbuf_save_to_*{join,concat}(). See Bug 43090.
   2606     if search(r'\bgdk_pixbuf_save_to\w+\b', line):
   2607         return
   2608 
   2609     # Don't warn about NULL usage in gtk_widget_style_get(), gtk_style_context_get_style(), or gtk_style_context_get(). See Bug 51758
   2610     if search(r'\bgtk_widget_style_get\(\w+\b', line) or search(r'\bgtk_style_context_get_style\(\w+\b', line) or search(r'\bgtk_style_context_get\(\w+\b', line):
   2611         return
   2612 
   2613     # Don't warn about NULL usage in soup_server_new(). See Bug 77890.
   2614     if search(r'\bsoup_server_new\(\w+\b', line):
   2615         return
   2616 
   2617     if search(r'\bNULL\b', line):
   2618         error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
   2619         return
   2620 
   2621     line = clean_lines.raw_lines[line_number]
   2622     # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
   2623     # matches, then do the check with strings collapsed to avoid giving errors for
   2624     # NULLs occurring in strings.
   2625     if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
   2626         error(line_number, 'readability/null', 4, 'Use 0 or null instead of NULL (even in *comments*).')
   2627 
   2628 def get_line_width(line):
   2629     """Determines the width of the line in column positions.
   2630 
   2631     Args:
   2632       line: A string, which may be a Unicode string.
   2633 
   2634     Returns:
   2635       The width of the line in column positions, accounting for Unicode
   2636       combining characters and wide characters.
   2637     """
   2638     if isinstance(line, unicode):
   2639         width = 0
   2640         for c in unicodedata.normalize('NFC', line):
   2641             if unicodedata.east_asian_width(c) in ('W', 'F'):
   2642                 width += 2
   2643             elif not unicodedata.combining(c):
   2644                 width += 1
   2645         return width
   2646     return len(line)
   2647 
   2648 
   2649 def check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line_number, error):
   2650     """Scans the bodies of conditionals and loops, and in particular
   2651     all the arms of conditionals, for violations in the use of braces.
   2652 
   2653     Specifically:
   2654 
   2655     (1) If an arm omits braces, then the following statement must be on one
   2656     physical line.
   2657     (2) If any arm uses braces, all arms must use them.
   2658 
   2659     These checks are only done here if we find the start of an
   2660     'if/for/foreach/while' statement, because this function fails fast
   2661     if it encounters constructs it doesn't understand. Checks
   2662     elsewhere validate other constraints, such as requiring '}' and
   2663     'else' to be on the same line.
   2664 
   2665     Args:
   2666       clean_lines: A CleansedLines instance containing the file.
   2667       line_number: The number of the line to check.
   2668       error: The function to call with any errors found.
   2669     """
   2670 
   2671     # We work with the elided lines. Comments have been removed, but line
   2672     # numbers are preserved, so we can still find situations where
   2673     # single-expression control clauses span multiple lines, or when a
   2674     # comment preceded the expression.
   2675     lines = clean_lines.elided
   2676     line = lines[line_number]
   2677 
   2678     # Match control structures.
   2679     control_match = match(r'\s*(if|foreach|for|while)\s*\(', line)
   2680     if not control_match:
   2681         return
   2682 
   2683     # Found the start of a conditional or loop.
   2684 
   2685     # The following loop handles all potential arms of the control clause.
   2686     # The initial conditions are the following:
   2687     #   - We start on the opening paren '(' of the condition, *unless* we are
   2688     #     handling an 'else' block, in which case there is no condition.
   2689     #   - In the latter case, we start at the position just beyond the 'else'
   2690     #     token.
   2691     expect_conditional_expression = True
   2692     know_whether_using_braces = False
   2693     using_braces = False
   2694     search_for_else_clause = control_match.group(1) == "if"
   2695     current_pos = Position(line_number, control_match.end() - 1)
   2696 
   2697     while True:
   2698         if expect_conditional_expression:
   2699             # Try to find the end of the conditional expression,
   2700             # potentially spanning multiple lines.
   2701             open_paren_pos = current_pos
   2702             close_paren_pos = close_expression(lines, open_paren_pos)
   2703             if close_paren_pos.column < 0:
   2704                 return
   2705             current_pos = close_paren_pos
   2706 
   2707         end_line_of_conditional = current_pos.row
   2708 
   2709         # Find the start of the body.
   2710         current_pos = _find_in_lines(r'\S', lines, current_pos, None)
   2711         if not current_pos:
   2712             return
   2713 
   2714         current_arm_uses_brace = False
   2715         if lines[current_pos.row][current_pos.column] == '{':
   2716             current_arm_uses_brace = True
   2717         if know_whether_using_braces:
   2718             if using_braces != current_arm_uses_brace:
   2719                 error(current_pos.row, 'whitespace/braces', 4,
   2720                       'If one part of an if-else statement uses curly braces, the other part must too.')
   2721                 return
   2722         know_whether_using_braces = True
   2723         using_braces = current_arm_uses_brace
   2724 
   2725         if using_braces:
   2726             # Skip over the entire arm.
   2727             current_pos = close_expression(lines, current_pos)
   2728             if current_pos.column < 0:
   2729                 return
   2730         else:
   2731             # Skip over the current expression.
   2732             current_line_number = current_pos.row
   2733             current_pos = _find_in_lines(r';', lines, current_pos, None)
   2734             if not current_pos:
   2735                 return
   2736             # If the end of the expression is beyond the line just after
   2737             # the close parenthesis or control clause, we've found a
   2738             # single-expression arm that spans multiple lines. (We don't
   2739             # fire this error for expressions ending on the same line; that
   2740             # is a different error, handled elsewhere.)
   2741             if current_pos.row > 1 + end_line_of_conditional:
   2742                 error(current_pos.row, 'whitespace/braces', 4,
   2743                       'A conditional or loop body must use braces if the statement is more than one line long.')
   2744                 return
   2745             current_pos = Position(current_pos.row, 1 + current_pos.column)
   2746 
   2747         # At this point current_pos points just past the end of the last
   2748         # arm. If we just handled the last control clause, we're done.
   2749         if not search_for_else_clause:
   2750             return
   2751 
   2752         # Scan forward for the next non-whitespace character, and see
   2753         # whether we are continuing a conditional (with an 'else' or
   2754         # 'else if'), or are done.
   2755         current_pos = _find_in_lines(r'\S', lines, current_pos, None)
   2756         if not current_pos:
   2757             return
   2758         next_nonspace_string = lines[current_pos.row][current_pos.column:]
   2759         next_conditional = match(r'(else\s*if|else)', next_nonspace_string)
   2760         if not next_conditional:
   2761             # Done processing this 'if' and all arms.
   2762             return
   2763         if next_conditional.group(1) == "else if":
   2764             current_pos = _find_in_lines(r'\(', lines, current_pos, None)
   2765         else:
   2766             current_pos.column += 4  # skip 'else'
   2767             expect_conditional_expression = False
   2768             search_for_else_clause = False
   2769     # End while loop
   2770 
   2771 def check_style(clean_lines, line_number, file_extension, class_state, file_state, enum_state, error):
   2772     """Checks rules from the 'C++ style rules' section of cppguide.html.
   2773 
   2774     Most of these rules are hard to test (naming, comment style), but we
   2775     do what we can.  In particular we check for 4-space indents, line lengths,
   2776     tab usage, spaces inside code, etc.
   2777 
   2778     Args:
   2779       clean_lines: A CleansedLines instance containing the file.
   2780       line_number: The number of the line to check.
   2781       file_extension: The extension (without the dot) of the filename.
   2782       class_state: A _ClassState instance which maintains information about
   2783                    the current stack of nested class declarations being parsed.
   2784       file_state: A _FileState instance which maintains information about
   2785                   the state of things in the file.
   2786       enum_state: A _EnumState instance which maintains the current enum state.
   2787       error: The function to call with any errors found.
   2788     """
   2789 
   2790     raw_lines = clean_lines.raw_lines
   2791     line = raw_lines[line_number]
   2792 
   2793     if line.find('\t') != -1:
   2794         error(line_number, 'whitespace/tab', 1,
   2795               'Tab found; better to use spaces')
   2796 
   2797     cleansed_line = clean_lines.elided[line_number]
   2798     if line and line[-1].isspace():
   2799         error(line_number, 'whitespace/end_of_line', 4,
   2800               'Line ends in whitespace.  Consider deleting these extra spaces.')
   2801 
   2802     if (cleansed_line.count(';') > 1
   2803         # for loops are allowed two ;'s (and may run over two lines).
   2804         and cleansed_line.find('for') == -1
   2805         and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
   2806              or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
   2807         # It's ok to have many commands in a switch case that fits in 1 line
   2808         and not ((cleansed_line.find('case ') != -1
   2809                   or cleansed_line.find('default:') != -1)
   2810                  and cleansed_line.find('break;') != -1)
   2811         # Also it's ok to have many commands in trivial single-line accessors in class definitions.
   2812         and not (match(r'.*\(.*\).*{.*.}', line)
   2813                  and class_state.classinfo_stack
   2814                  and line.count('{') == line.count('}'))
   2815         and not cleansed_line.startswith('#define ')
   2816         # It's ok to use use WTF_MAKE_NONCOPYABLE and WTF_MAKE_FAST_ALLOCATED macros in 1 line
   2817         and not (cleansed_line.find("WTF_MAKE_NONCOPYABLE") != -1
   2818                  and cleansed_line.find("WTF_MAKE_FAST_ALLOCATED") != -1)):
   2819         error(line_number, 'whitespace/newline', 4,
   2820               'More than one command on the same line')
   2821 
   2822     if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
   2823         error(line_number, 'whitespace/operators', 4,
   2824               'Boolean expressions that span multiple lines should have their '
   2825               'operators on the left side of the line instead of the right side.')
   2826 
   2827     # Some more style checks
   2828     check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error)
   2829     check_directive_indentation(clean_lines, line_number, file_state, error)
   2830     check_using_std(clean_lines, line_number, file_state, error)
   2831     check_max_min_macros(clean_lines, line_number, file_state, error)
   2832     check_ctype_functions(clean_lines, line_number, file_state, error)
   2833     check_switch_indentation(clean_lines, line_number, error)
   2834     check_braces(clean_lines, line_number, error)
   2835     check_exit_statement_simplifications(clean_lines, line_number, error)
   2836     check_spacing(file_extension, clean_lines, line_number, error)
   2837     check_check(clean_lines, line_number, error)
   2838     check_for_comparisons_to_zero(clean_lines, line_number, error)
   2839     check_for_null(clean_lines, line_number, file_state, error)
   2840     check_indentation_amount(clean_lines, line_number, error)
   2841     check_enum_casing(clean_lines, line_number, enum_state, error)
   2842 
   2843 
   2844 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
   2845 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
   2846 # Matches the first component of a filename delimited by -s and _s. That is:
   2847 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
   2848 #  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
   2849 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
   2850 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
   2851 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
   2852 
   2853 
   2854 def _drop_common_suffixes(filename):
   2855     """Drops common suffixes like _test.cpp or -inl.h from filename.
   2856 
   2857     For example:
   2858       >>> _drop_common_suffixes('foo/foo-inl.h')
   2859       'foo/foo'
   2860       >>> _drop_common_suffixes('foo/bar/foo.cpp')
   2861       'foo/bar/foo'
   2862       >>> _drop_common_suffixes('foo/foo_internal.h')
   2863       'foo/foo'
   2864       >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
   2865       'foo/foo_unusualinternal'
   2866 
   2867     Args:
   2868       filename: The input filename.
   2869 
   2870     Returns:
   2871       The filename with the common suffix removed.
   2872     """
   2873     for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
   2874                    'inl.h', 'impl.h', 'internal.h'):
   2875         if (filename.endswith(suffix) and len(filename) > len(suffix)
   2876             and filename[-len(suffix) - 1] in ('-', '_')):
   2877             return filename[:-len(suffix) - 1]
   2878     return os.path.splitext(filename)[0]
   2879 
   2880 
   2881 def _classify_include(filename, include, is_system, include_state):
   2882     """Figures out what kind of header 'include' is.
   2883 
   2884     Args:
   2885       filename: The current file cpp_style is running over.
   2886       include: The path to a #included file.
   2887       is_system: True if the #include used <> rather than "".
   2888       include_state: An _IncludeState instance in which the headers are inserted.
   2889 
   2890     Returns:
   2891       One of the _XXX_HEADER constants.
   2892 
   2893     For example:
   2894       >>> _classify_include('foo.cpp', 'config.h', False)
   2895       _CONFIG_HEADER
   2896       >>> _classify_include('foo.cpp', 'foo.h', False)
   2897       _PRIMARY_HEADER
   2898       >>> _classify_include('foo.cpp', 'bar.h', False)
   2899       _OTHER_HEADER
   2900     """
   2901 
   2902     # If it is a system header we know it is classified as _OTHER_HEADER.
   2903     if is_system and not include.startswith('public/'):
   2904         return _OTHER_HEADER
   2905 
   2906     # If the include is named config.h then this is WebCore/config.h.
   2907     if include == "config.h":
   2908         return _CONFIG_HEADER
   2909 
   2910     # There cannot be primary includes in header files themselves. Only an
   2911     # include exactly matches the header filename will be is flagged as
   2912     # primary, so that it triggers the "don't include yourself" check.
   2913     if filename.endswith('.h') and filename != include:
   2914         return _OTHER_HEADER;
   2915 
   2916     # Qt's moc files do not follow the naming and ordering rules, so they should be skipped
   2917     if include.startswith('moc_') and include.endswith('.cpp'):
   2918         return _MOC_HEADER
   2919 
   2920     if include.endswith('.moc'):
   2921         return _MOC_HEADER
   2922 
   2923     # If the target file basename starts with the include we're checking
   2924     # then we consider it the primary header.
   2925     target_base = FileInfo(filename).base_name()
   2926     include_base = FileInfo(include).base_name()
   2927 
   2928     # If we haven't encountered a primary header, then be lenient in checking.
   2929     if not include_state.visited_primary_section():
   2930         if target_base.find(include_base) != -1:
   2931             return _PRIMARY_HEADER
   2932         # Qt private APIs use _p.h suffix.
   2933         if include_base.find(target_base) != -1 and include_base.endswith('_p'):
   2934             return _PRIMARY_HEADER
   2935 
   2936     # If we already encountered a primary header, perform a strict comparison.
   2937     # In case the two filename bases are the same then the above lenient check
   2938     # probably was a false positive.
   2939     elif include_state.visited_primary_section() and target_base == include_base:
   2940         if include == "ResourceHandleWin.h":
   2941             # FIXME: Thus far, we've only seen one example of these, but if we
   2942             # start to see more, please consider generalizing this check
   2943             # somehow.
   2944             return _OTHER_HEADER
   2945         return _PRIMARY_HEADER
   2946 
   2947     return _OTHER_HEADER
   2948 
   2949 
   2950 def _does_primary_header_exist(filename):
   2951     """Return a primary header file name for a file, or empty string
   2952     if the file is not source file or primary header does not exist.
   2953     """
   2954     fileinfo = FileInfo(filename)
   2955     if not fileinfo.is_source():
   2956         return False
   2957     primary_header = fileinfo.no_extension() + ".h"
   2958     return os.path.isfile(primary_header)
   2959 
   2960 
   2961 def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error):
   2962     """Check rules that are applicable to #include lines.
   2963 
   2964     Strings on #include lines are NOT removed from elided line, to make
   2965     certain tasks easier. However, to prevent false positives, checks
   2966     applicable to #include lines in CheckLanguage must be put here.
   2967 
   2968     Args:
   2969       filename: The name of the current file.
   2970       file_extension: The current file extension, without the leading dot.
   2971       clean_lines: A CleansedLines instance containing the file.
   2972       line_number: The number of the line to check.
   2973       include_state: An _IncludeState instance in which the headers are inserted.
   2974       error: The function to call with any errors found.
   2975     """
   2976     # FIXME: For readability or as a possible optimization, consider
   2977     #        exiting early here by checking whether the "build/include"
   2978     #        category should be checked for the given filename.  This
   2979     #        may involve having the error handler classes expose a
   2980     #        should_check() method, in addition to the usual __call__
   2981     #        method.
   2982     line = clean_lines.lines[line_number]
   2983 
   2984     matched = _RE_PATTERN_INCLUDE.search(line)
   2985     if not matched:
   2986         return
   2987 
   2988     include = matched.group(2)
   2989     is_system = (matched.group(1) == '<')
   2990 
   2991     # Look for any of the stream classes that are part of standard C++.
   2992     if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
   2993         error(line_number, 'readability/streams', 3,
   2994               'Streams are highly discouraged.')
   2995 
   2996     # Look for specific includes to fix.
   2997     if include.startswith('wtf/') and is_system:
   2998         error(line_number, 'build/include', 4,
   2999               'wtf includes should be "wtf/file.h" instead of <wtf/file.h>.')
   3000 
   3001     if filename.find('/chromium/') != -1 and include.startswith('cc/CC'):
   3002         error(line_number, 'build/include', 4,
   3003               'cc includes should be "CCFoo.h" instead of "cc/CCFoo.h".')
   3004 
   3005     duplicate_header = include in include_state
   3006     if duplicate_header:
   3007         error(line_number, 'build/include', 4,
   3008               '"%s" already included at %s:%s' %
   3009               (include, filename, include_state[include]))
   3010     else:
   3011         include_state[include] = line_number
   3012 
   3013     header_type = _classify_include(filename, include, is_system, include_state)
   3014     primary_header_exists = _does_primary_header_exist(filename)
   3015     include_state.header_types[line_number] = header_type
   3016 
   3017     # Only proceed if this isn't a duplicate header.
   3018     if duplicate_header:
   3019         return
   3020 
   3021     # We want to ensure that headers appear in the right order:
   3022     # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
   3023     # 2) for header files: alphabetically sorted
   3024     # The include_state object keeps track of the last type seen
   3025     # and complains if the header types are out of order or missing.
   3026     error_message = include_state.check_next_include_order(header_type,
   3027                                                            file_extension == "h",
   3028                                                            primary_header_exists)
   3029 
   3030     # Check to make sure we have a blank line after primary header.
   3031     if not error_message and header_type == _PRIMARY_HEADER:
   3032          next_line = clean_lines.raw_lines[line_number + 1]
   3033          if not is_blank_line(next_line):
   3034             error(line_number, 'build/include_order', 4,
   3035                   'You should add a blank line after implementation file\'s own header.')
   3036 
   3037     # Check to make sure all headers besides config.h and the primary header are
   3038     # alphabetically sorted. Skip Qt's moc files.
   3039     if not error_message and header_type == _OTHER_HEADER:
   3040          previous_line_number = line_number - 1;
   3041          previous_line = clean_lines.lines[previous_line_number]
   3042          previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   3043          while (not previous_match and previous_line_number > 0
   3044                 and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
   3045             previous_line_number -= 1;
   3046             previous_line = clean_lines.lines[previous_line_number]
   3047             previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   3048          if previous_match:
   3049             previous_header_type = include_state.header_types[previous_line_number]
   3050             if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
   3051                 # This type of error is potentially a problem with this line or the previous one,
   3052                 # so if the error is filtered for one line, report it for the next. This is so that
   3053                 # we properly handle patches, for which only modified lines produce errors.
   3054                 if not error(line_number - 1, 'build/include_order', 4, 'Alphabetical sorting problem.'):
   3055                     error(line_number, 'build/include_order', 4, 'Alphabetical sorting problem.')
   3056 
   3057     if error_message:
   3058         if file_extension == 'h':
   3059             error(line_number, 'build/include_order', 4,
   3060                   '%s Should be: alphabetically sorted.' %
   3061                   error_message)
   3062         else:
   3063             error(line_number, 'build/include_order', 4,
   3064                   '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
   3065                   error_message)
   3066 
   3067 
   3068 def check_language(filename, clean_lines, line_number, file_extension, include_state,
   3069                    file_state, error):
   3070     """Checks rules from the 'C++ language rules' section of cppguide.html.
   3071 
   3072     Some of these rules are hard to test (function overloading, using
   3073     uint32 inappropriately), but we do the best we can.
   3074 
   3075     Args:
   3076       filename: The name of the current file.
   3077       clean_lines: A CleansedLines instance containing the file.
   3078       line_number: The number of the line to check.
   3079       file_extension: The extension (without the dot) of the filename.
   3080       include_state: An _IncludeState instance in which the headers are inserted.
   3081       file_state: A _FileState instance which maintains information about
   3082                   the state of things in the file.
   3083       error: The function to call with any errors found.
   3084     """
   3085     # If the line is empty or consists of entirely a comment, no need to
   3086     # check it.
   3087     line = clean_lines.elided[line_number]
   3088     if not line:
   3089         return
   3090 
   3091     matched = _RE_PATTERN_INCLUDE.search(line)
   3092     if matched:
   3093         check_include_line(filename, file_extension, clean_lines, line_number, include_state, error)
   3094         return
   3095 
   3096     # FIXME: figure out if they're using default arguments in fn proto.
   3097 
   3098     # Check to see if they're using an conversion function cast.
   3099     # I just try to capture the most common basic types, though there are more.
   3100     # Parameterless conversion functions, such as bool(), are allowed as they are
   3101     # probably a member operator declaration or default constructor.
   3102     matched = search(
   3103         r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
   3104     if matched:
   3105         # gMock methods are defined using some variant of MOCK_METHODx(name, type)
   3106         # where type may be float(), int(string), etc.  Without context they are
   3107         # virtually indistinguishable from int(x) casts.
   3108         if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
   3109             error(line_number, 'readability/casting', 4,
   3110                   'Using deprecated casting style.  '
   3111                   'Use static_cast<%s>(...) instead' %
   3112                   matched.group(1))
   3113 
   3114     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   3115                        'static_cast',
   3116                        r'\((int|float|double|bool|char|u?int(16|32|64))\)',
   3117                        error)
   3118     # This doesn't catch all cases.  Consider (const char * const)"hello".
   3119     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   3120                        'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
   3121 
   3122     # In addition, we look for people taking the address of a cast.  This
   3123     # is dangerous -- casts can assign to temporaries, so the pointer doesn't
   3124     # point where you think.
   3125     if search(
   3126         r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
   3127         error(line_number, 'runtime/casting', 4,
   3128               ('Are you taking an address of a cast?  '
   3129                'This is dangerous: could be a temp var.  '
   3130                'Take the address before doing the cast, rather than after'))
   3131 
   3132     # Check for people declaring static/global STL strings at the top level.
   3133     # This is dangerous because the C++ language does not guarantee that
   3134     # globals with constructors are initialized before the first access.
   3135     matched = match(
   3136         r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
   3137         line)
   3138     # Make sure it's not a function.
   3139     # Function template specialization looks like: "string foo<Type>(...".
   3140     # Class template definitions look like: "string Foo<Type>::Method(...".
   3141     if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
   3142                              matched.group(3)):
   3143         error(line_number, 'runtime/string', 4,
   3144               'For a static/global string constant, use a C style string instead: '
   3145               '"%schar %s[]".' %
   3146               (matched.group(1), matched.group(2)))
   3147 
   3148     # Check that we're not using RTTI outside of testing code.
   3149     if search(r'\bdynamic_cast<', line):
   3150         error(line_number, 'runtime/rtti', 5,
   3151               'Do not use dynamic_cast<>.  If you need to cast within a class '
   3152               "hierarchy, use static_cast<> to upcast.  Google doesn't support "
   3153               'RTTI.')
   3154 
   3155     if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
   3156         error(line_number, 'runtime/init', 4,
   3157               'You seem to be initializing a member variable with itself.')
   3158 
   3159     if file_extension == 'h':
   3160         # FIXME: check that 1-arg constructors are explicit.
   3161         #        How to tell it's a constructor?
   3162         #        (handled in check_for_non_standard_constructs for now)
   3163         pass
   3164 
   3165     # Check if people are using the verboten C basic types.  The only exception
   3166     # we regularly allow is "unsigned short port" for port.
   3167     if search(r'\bshort port\b', line):
   3168         if not search(r'\bunsigned short port\b', line):
   3169             error(line_number, 'runtime/int', 4,
   3170                   'Use "unsigned short" for ports, not "short"')
   3171 
   3172     # When snprintf is used, the second argument shouldn't be a literal.
   3173     matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
   3174     if matched:
   3175         error(line_number, 'runtime/printf', 3,
   3176               'If you can, use sizeof(%s) instead of %s as the 2nd arg '
   3177               'to snprintf.' % (matched.group(1), matched.group(2)))
   3178 
   3179     # Check if some verboten C functions are being used.
   3180     if search(r'\bsprintf\b', line):
   3181         error(line_number, 'runtime/printf', 5,
   3182               'Never use sprintf.  Use snprintf instead.')
   3183     matched = search(r'\b(strcpy|strcat)\b', line)
   3184     if matched:
   3185         error(line_number, 'runtime/printf', 4,
   3186               'Almost always, snprintf is better than %s' % matched.group(1))
   3187 
   3188     if search(r'\bsscanf\b', line):
   3189         error(line_number, 'runtime/printf', 1,
   3190               'sscanf can be ok, but is slow and can overflow buffers.')
   3191 
   3192     # Check for suspicious usage of "if" like
   3193     # } if (a == b) {
   3194     if search(r'\}\s*if\s*\(', line):
   3195         error(line_number, 'readability/braces', 4,
   3196               'Did you mean "else if"? If not, start a new line for "if".')
   3197 
   3198     # Check for potential format string bugs like printf(foo).
   3199     # We constrain the pattern not to pick things like DocidForPrintf(foo).
   3200     # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
   3201     matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
   3202     if matched:
   3203         error(line_number, 'runtime/printf', 4,
   3204               'Potential format string bug. Do %s("%%s", %s) instead.'
   3205               % (matched.group(1), matched.group(2)))
   3206 
   3207     # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
   3208     matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
   3209     if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
   3210         error(line_number, 'runtime/memset', 4,
   3211               'Did you mean "memset(%s, 0, %s)"?'
   3212               % (matched.group(1), matched.group(2)))
   3213 
   3214     # Detect variable-length arrays.
   3215     matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
   3216     if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
   3217         matched.group(3).find(']') == -1):
   3218         # Split the size using space and arithmetic operators as delimiters.
   3219         # If any of the resulting tokens are not compile time constants then
   3220         # report the error.
   3221         tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
   3222         is_const = True
   3223         skip_next = False
   3224         for tok in tokens:
   3225             if skip_next:
   3226                 skip_next = False
   3227                 continue
   3228 
   3229             if search(r'sizeof\(.+\)', tok):
   3230                 continue
   3231             if search(r'arraysize\(\w+\)', tok):
   3232                 continue
   3233 
   3234             tok = tok.lstrip('(')
   3235             tok = tok.rstrip(')')
   3236             if not tok:
   3237                 continue
   3238             if match(r'\d+', tok):
   3239                 continue
   3240             if match(r'0[xX][0-9a-fA-F]+', tok):
   3241                 continue
   3242             if match(r'k[A-Z0-9]\w*', tok):
   3243                 continue
   3244             if match(r'(.+::)?k[A-Z0-9]\w*', tok):
   3245                 continue
   3246             if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
   3247                 continue
   3248             # A catch all for tricky sizeof cases, including 'sizeof expression',
   3249             # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
   3250             # requires skipping the next token becasue we split on ' ' and '*'.
   3251             if tok.startswith('sizeof'):
   3252                 skip_next = True
   3253                 continue
   3254             is_const = False
   3255             break
   3256         if not is_const:
   3257             error(line_number, 'runtime/arrays', 1,
   3258                   'Do not use variable-length arrays.  Use an appropriately named '
   3259                   "('k' followed by CamelCase) compile-time constant for the size.")
   3260 
   3261     # Check for use of unnamed namespaces in header files.  Registration
   3262     # macros are typically OK, so we allow use of "namespace {" on lines
   3263     # that end with backslashes.
   3264     if (file_extension == 'h'
   3265         and search(r'\bnamespace\s*{', line)
   3266         and line[-1] != '\\'):
   3267         error(line_number, 'build/namespaces', 4,
   3268               'Do not use unnamed namespaces in header files.  See '
   3269               'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
   3270               ' for more information.')
   3271 
   3272     # Check for plain bitfields declared without either "singed" or "unsigned".
   3273     # Most compilers treat such bitfields as signed, but there are still compilers like
   3274     # RVCT 4.0 that use unsigned by default.
   3275     matched = re.match(r'\s*((const|mutable)\s+)?(char|(short(\s+int)?)|int|long(\s+(long|int))?)\s+[a-zA-Z_][a-zA-Z0-9_]*\s*:\s*\d+\s*;', line)
   3276     if matched:
   3277         error(line_number, 'runtime/bitfields', 5,
   3278               'Please declare integral type bitfields with either signed or unsigned.')
   3279 
   3280     check_identifier_name_in_declaration(filename, line_number, line, file_state, error)
   3281 
   3282     # Check for unsigned int (should be just 'unsigned')
   3283     if search(r'\bunsigned int\b', line):
   3284         error(line_number, 'runtime/unsigned', 1,
   3285               'Omit int when using unsigned')
   3286 
   3287     # Check that we're not using static_cast<Text*>.
   3288     if search(r'\bstatic_cast<Text\*>', line):
   3289         error(line_number, 'readability/check', 4,
   3290               'Consider using toText helper function in WebCore/dom/Text.h '
   3291               'instead of static_cast<Text*>')
   3292 
   3293 def check_identifier_name_in_declaration(filename, line_number, line, file_state, error):
   3294     """Checks if identifier names contain any underscores.
   3295 
   3296     As identifiers in libraries we are using have a bunch of
   3297     underscores, we only warn about the declarations of identifiers
   3298     and don't check use of identifiers.
   3299 
   3300     Args:
   3301       filename: The name of the current file.
   3302       line_number: The number of the line to check.
   3303       line: The line of code to check.
   3304       file_state: A _FileState instance which maintains information about
   3305                   the state of things in the file.
   3306       error: The function to call with any errors found.
   3307     """
   3308     # We don't check a return statement.
   3309     if match(r'\s*(return|delete)\b', line):
   3310         return
   3311 
   3312     # Basically, a declaration is a type name followed by whitespaces
   3313     # followed by an identifier. The type name can be complicated
   3314     # due to type adjectives and templates. We remove them first to
   3315     # simplify the process to find declarations of identifiers.
   3316 
   3317     # Convert "long long", "long double", and "long long int" to
   3318     # simple types, but don't remove simple "long".
   3319     line = sub(r'long (long )?(?=long|double|int)', '', line)
   3320     # Convert unsigned/signed types to simple types, too.
   3321     line = sub(r'(unsigned|signed) (?=char|short|int|long)', '', line)
   3322     line = sub(r'\b(inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line)
   3323 
   3324     # Remove "new" and "new (expr)" to simplify, too.
   3325     line = sub(r'new\s*(\([^)]*\))?', '', line)
   3326 
   3327     # Remove all template parameters by removing matching < and >.
   3328     # Loop until no templates are removed to remove nested templates.
   3329     while True:
   3330         line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line)
   3331         if not number_of_replacements:
   3332             break
   3333 
   3334     # Declarations of local variables can be in condition expressions
   3335     # of control flow statements (e.g., "if (RenderObject* p = o->parent())").
   3336     # We remove the keywords and the first parenthesis.
   3337     #
   3338     # Declarations in "while", "if", and "switch" are different from
   3339     # other declarations in two aspects:
   3340     #
   3341     # - There can be only one declaration between the parentheses.
   3342     #   (i.e., you cannot write "if (int i = 0, j = 1) {}")
   3343     # - The variable must be initialized.
   3344     #   (i.e., you cannot write "if (int i) {}")
   3345     #
   3346     # and we will need different treatments for them.
   3347     line = sub(r'^\s*for\s*\(', '', line)
   3348     line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line)
   3349 
   3350     # Detect variable and functions.
   3351     type_regexp = r'\w([\w]|\s*[*&]\s*|::)+'
   3352     identifier_regexp = r'(?P<identifier>[\w:]+)'
   3353     maybe_bitfield_regexp = r'(:\s*\d+\s*)?'
   3354     character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)'
   3355     declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*' + maybe_bitfield_regexp + character_after_identifier_regexp
   3356     declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp
   3357     is_function_arguments = False
   3358     number_of_identifiers = 0
   3359     while True:
   3360         # If we are seeing the first identifier or arguments of a
   3361         # function, there should be a type name before an identifier.
   3362         if not number_of_identifiers or is_function_arguments:
   3363             declaration_regexp = declaration_with_type_regexp
   3364         else:
   3365             declaration_regexp = declaration_without_type_regexp
   3366 
   3367         matched = match(declaration_regexp, line)
   3368         if not matched:
   3369             return
   3370         identifier = matched.group('identifier')
   3371         character_after_identifier = matched.group('character_after_identifier')
   3372 
   3373         # If we removed a non-for-control statement, the character after
   3374         # the identifier should be '='. With this rule, we can avoid
   3375         # warning for cases like "if (val & INT_MAX) {".
   3376         if control_statement and character_after_identifier != '=':
   3377             return
   3378 
   3379         is_function_arguments = is_function_arguments or character_after_identifier == '('
   3380 
   3381         # Remove "m_" and "s_" to allow them.
   3382         modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier)
   3383         if not file_state.is_objective_c() and modified_identifier.find('_') >= 0:
   3384             # Various exceptions to the rule: JavaScript op codes functions, const_iterator.
   3385             if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('op_') >= 0)
   3386                 and not (filename.find('gtk') >= 0 and modified_identifier.startswith('webkit_') >= 0)
   3387                 and not modified_identifier.startswith('tst_')
   3388                 and not modified_identifier.startswith('webkit_dom_object_')
   3389                 and not modified_identifier.startswith('webkit_soup')
   3390                 and not modified_identifier.startswith('NPN_')
   3391                 and not modified_identifier.startswith('NPP_')
   3392                 and not modified_identifier.startswith('NP_')
   3393                 and not modified_identifier.startswith('qt_')
   3394                 and not modified_identifier.startswith('_q_')
   3395                 and not modified_identifier.startswith('cairo_')
   3396                 and not modified_identifier.startswith('Ecore_')
   3397                 and not modified_identifier.startswith('Eina_')
   3398                 and not modified_identifier.startswith('Evas_')
   3399                 and not modified_identifier.startswith('Ewk_')
   3400                 and not modified_identifier.startswith('cti_')
   3401                 and not modified_identifier.find('::qt_') >= 0
   3402                 and not modified_identifier.find('::_q_') >= 0
   3403                 and not modified_identifier == "const_iterator"
   3404                 and not modified_identifier == "vm_throw"
   3405                 and not modified_identifier == "DFG_OPERATION"):
   3406                 error(line_number, 'readability/naming/underscores', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.")
   3407 
   3408         # Check for variables named 'l', these are too easy to confuse with '1' in some fonts
   3409         if modified_identifier == 'l':
   3410             error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.")
   3411 
   3412         # There can be only one declaration in non-for-control statements.
   3413         if control_statement:
   3414             return
   3415         # We should continue checking if this is a function
   3416         # declaration because we need to check its arguments.
   3417         # Also, we need to check multiple declarations.
   3418         if character_after_identifier != '(' and character_after_identifier != ',':
   3419             return
   3420 
   3421         number_of_identifiers += 1
   3422         line = line[matched.end():]
   3423 
   3424 def check_c_style_cast(line_number, line, raw_line, cast_type, pattern,
   3425                        error):
   3426     """Checks for a C-style cast by looking for the pattern.
   3427 
   3428     This also handles sizeof(type) warnings, due to similarity of content.
   3429 
   3430     Args:
   3431       line_number: The number of the line to check.
   3432       line: The line of code to check.
   3433       raw_line: The raw line of code to check, with comments.
   3434       cast_type: The string for the C++ cast to recommend.  This is either
   3435                  reinterpret_cast or static_cast, depending.
   3436       pattern: The regular expression used to find C-style casts.
   3437       error: The function to call with any errors found.
   3438     """
   3439     matched = search(pattern, line)
   3440     if not matched:
   3441         return
   3442 
   3443     # e.g., sizeof(int)
   3444     sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
   3445     if sizeof_match:
   3446         error(line_number, 'runtime/sizeof', 1,
   3447               'Using sizeof(type).  Use sizeof(varname) instead if possible')
   3448         return
   3449 
   3450     remainder = line[matched.end(0):]
   3451 
   3452     # The close paren is for function pointers as arguments to a function.
   3453     # eg, void foo(void (*bar)(int));
   3454     # The semicolon check is a more basic function check; also possibly a
   3455     # function pointer typedef.
   3456     # eg, void foo(int); or void foo(int) const;
   3457     # The equals check is for function pointer assignment.
   3458     # eg, void *(*foo)(int) = ...
   3459     #
   3460     # Right now, this will only catch cases where there's a single argument, and
   3461     # it's unnamed.  It should probably be expanded to check for multiple
   3462     # arguments with some unnamed.
   3463     function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
   3464     if function_match:
   3465         if (not function_match.group(3)
   3466             or function_match.group(3) == ';'
   3467             or raw_line.find('/*') < 0):
   3468             error(line_number, 'readability/function', 3,
   3469                   'All parameters should be named in a function')
   3470         return
   3471 
   3472     # At this point, all that should be left is actual casts.
   3473     error(line_number, 'readability/casting', 4,
   3474           'Using C-style cast.  Use %s<%s>(...) instead' %
   3475           (cast_type, matched.group(1)))
   3476 
   3477 
   3478 _HEADERS_CONTAINING_TEMPLATES = (
   3479     ('<deque>', ('deque',)),
   3480     ('<functional>', ('unary_function', 'binary_function',
   3481                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
   3482                       'negate',
   3483                       'equal_to', 'not_equal_to', 'greater', 'less',
   3484                       'greater_equal', 'less_equal',
   3485                       'logical_and', 'logical_or', 'logical_not',
   3486                       'unary_negate', 'not1', 'binary_negate', 'not2',
   3487                       'bind1st', 'bind2nd',
   3488                       'pointer_to_unary_function',
   3489                       'pointer_to_binary_function',
   3490                       'ptr_fun',
   3491                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
   3492                       'mem_fun_ref_t',
   3493                       'const_mem_fun_t', 'const_mem_fun1_t',
   3494                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
   3495                       'mem_fun_ref',
   3496                      )),
   3497     ('<limits>', ('numeric_limits',)),
   3498     ('<list>', ('list',)),
   3499     ('<map>', ('map', 'multimap',)),
   3500     ('<memory>', ('allocator',)),
   3501     ('<queue>', ('queue', 'priority_queue',)),
   3502     ('<set>', ('set', 'multiset',)),
   3503     ('<stack>', ('stack',)),
   3504     ('<string>', ('char_traits', 'basic_string',)),
   3505     ('<utility>', ('pair',)),
   3506     ('<vector>', ('vector',)),
   3507 
   3508     # gcc extensions.
   3509     # Note: std::hash is their hash, ::hash is our hash
   3510     ('<hash_map>', ('hash_map', 'hash_multimap',)),
   3511     ('<hash_set>', ('hash_set', 'hash_multiset',)),
   3512     ('<slist>', ('slist',)),
   3513     )
   3514 
   3515 _HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
   3516     # We can trust with reasonable confidence that map gives us pair<>, too.
   3517     'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
   3518 }
   3519 
   3520 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
   3521 
   3522 _re_pattern_algorithm_header = []
   3523 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
   3524                   'transform'):
   3525     # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   3526     # type::max().
   3527     _re_pattern_algorithm_header.append(
   3528         (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
   3529          _template,
   3530          '<algorithm>'))
   3531 
   3532 _re_pattern_templates = []
   3533 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
   3534     for _template in _templates:
   3535         _re_pattern_templates.append(
   3536             (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
   3537              _template + '<>',
   3538              _header))
   3539 
   3540 
   3541 def files_belong_to_same_module(filename_cpp, filename_h):
   3542     """Check if these two filenames belong to the same module.
   3543 
   3544     The concept of a 'module' here is a as follows:
   3545     foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
   3546     same 'module' if they are in the same directory.
   3547     some/path/public/xyzzy and some/path/internal/xyzzy are also considered
   3548     to belong to the same module here.
   3549 
   3550     If the filename_cpp contains a longer path than the filename_h, for example,
   3551     '/absolute/path/to/base/sysinfo.cpp', and this file would include
   3552     'base/sysinfo.h', this function also produces the prefix needed to open the
   3553     header. This is used by the caller of this function to more robustly open the
   3554     header file. We don't have access to the real include paths in this context,
   3555     so we need this guesswork here.
   3556 
   3557     Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
   3558     according to this implementation. Because of this, this function gives
   3559     some false positives. This should be sufficiently rare in practice.
   3560 
   3561     Args:
   3562       filename_cpp: is the path for the .cpp file
   3563       filename_h: is the path for the header path
   3564 
   3565     Returns:
   3566       Tuple with a bool and a string:
   3567       bool: True if filename_cpp and filename_h belong to the same module.
   3568       string: the additional prefix needed to open the header file.
   3569     """
   3570 
   3571     if not filename_cpp.endswith('.cpp'):
   3572         return (False, '')
   3573     filename_cpp = filename_cpp[:-len('.cpp')]
   3574     if filename_cpp.endswith('_unittest'):
   3575         filename_cpp = filename_cpp[:-len('_unittest')]
   3576     elif filename_cpp.endswith('_test'):
   3577         filename_cpp = filename_cpp[:-len('_test')]
   3578     filename_cpp = filename_cpp.replace('/public/', '/')
   3579     filename_cpp = filename_cpp.replace('/internal/', '/')
   3580 
   3581     if not filename_h.endswith('.h'):
   3582         return (False, '')
   3583     filename_h = filename_h[:-len('.h')]
   3584     if filename_h.endswith('-inl'):
   3585         filename_h = filename_h[:-len('-inl')]
   3586     filename_h = filename_h.replace('/public/', '/')
   3587     filename_h = filename_h.replace('/internal/', '/')
   3588 
   3589     files_belong_to_same_module = filename_cpp.endswith(filename_h)
   3590     common_path = ''
   3591     if files_belong_to_same_module:
   3592         common_path = filename_cpp[:-len(filename_h)]
   3593     return files_belong_to_same_module, common_path
   3594 
   3595 
   3596 def update_include_state(filename, include_state, io=codecs):
   3597     """Fill up the include_state with new includes found from the file.
   3598 
   3599     Args:
   3600       filename: the name of the header to read.
   3601       include_state: an _IncludeState instance in which the headers are inserted.
   3602       io: The io factory to use to read the file. Provided for testability.
   3603 
   3604     Returns:
   3605       True if a header was succesfully added. False otherwise.
   3606     """
   3607     io = _unit_test_config.get(INCLUDE_IO_INJECTION_KEY, codecs)
   3608     header_file = None
   3609     try:
   3610         header_file = io.open(filename, 'r', 'utf8', 'replace')
   3611     except IOError:
   3612         return False
   3613     line_number = 0
   3614     for line in header_file:
   3615         line_number += 1
   3616         clean_line = cleanse_comments(line)
   3617         matched = _RE_PATTERN_INCLUDE.search(clean_line)
   3618         if matched:
   3619             include = matched.group(2)
   3620             # The value formatting is cute, but not really used right now.
   3621             # What matters here is that the key is in include_state.
   3622             include_state.setdefault(include, '%s:%d' % (filename, line_number))
   3623     return True
   3624 
   3625 
   3626 def check_for_include_what_you_use(filename, clean_lines, include_state, error):
   3627     """Reports for missing stl includes.
   3628 
   3629     This function will output warnings to make sure you are including the headers
   3630     necessary for the stl containers and functions that you use. We only give one
   3631     reason to include a header. For example, if you use both equal_to<> and
   3632     less<> in a .h file, only one (the latter in the file) of these will be
   3633     reported as a reason to include the <functional>.
   3634 
   3635     Args:
   3636       filename: The name of the current file.
   3637       clean_lines: A CleansedLines instance containing the file.
   3638       include_state: An _IncludeState instance.
   3639       error: The function to call with any errors found.
   3640     """
   3641     required = {}  # A map of header name to line_number and the template entity.
   3642         # Example of required: { '<functional>': (1219, 'less<>') }
   3643 
   3644     for line_number in xrange(clean_lines.num_lines()):
   3645         line = clean_lines.elided[line_number]
   3646         if not line or line[0] == '#':
   3647             continue
   3648 
   3649         # String is special -- it is a non-templatized type in STL.
   3650         if _RE_PATTERN_STRING.search(line):
   3651             required['<string>'] = (line_number, 'string')
   3652 
   3653         for pattern, template, header in _re_pattern_algorithm_header:
   3654             if pattern.search(line):
   3655                 required[header] = (line_number, template)
   3656 
   3657         # The following function is just a speed up, no semantics are changed.
   3658         if not '<' in line:  # Reduces the cpu time usage by skipping lines.
   3659             continue
   3660 
   3661         for pattern, template, header in _re_pattern_templates:
   3662             if pattern.search(line):
   3663                 required[header] = (line_number, template)
   3664 
   3665     # The policy is that if you #include something in foo.h you don't need to
   3666     # include it again in foo.cpp. Here, we will look at possible includes.
   3667     # Let's copy the include_state so it is only messed up within this function.
   3668     include_state = include_state.copy()
   3669 
   3670     # Did we find the header for this file (if any) and succesfully load it?
   3671     header_found = False
   3672 
   3673     # Use the absolute path so that matching works properly.
   3674     abs_filename = os.path.abspath(filename)
   3675 
   3676     # For Emacs's flymake.
   3677     # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
   3678     # by flymake and that file name might end with '_flymake.cpp'. In that case,
   3679     # restore original file name here so that the corresponding header file can be
   3680     # found.
   3681     # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
   3682     # instead of 'foo_flymake.h'
   3683     abs_filename = re.sub(r'_flymake\.cpp$', '.cpp', abs_filename)
   3684 
   3685     # include_state is modified during iteration, so we iterate over a copy of
   3686     # the keys.
   3687     for header in include_state.keys():  #NOLINT
   3688         (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
   3689         fullpath = common_path + header
   3690         if same_module and update_include_state(fullpath, include_state):
   3691             header_found = True
   3692 
   3693     # If we can't find the header file for a .cpp, assume it's because we don't
   3694     # know where to look. In that case we'll give up as we're not sure they
   3695     # didn't include it in the .h file.
   3696     # FIXME: Do a better job of finding .h files so we are confident that
   3697     #        not having the .h file means there isn't one.
   3698     if filename.endswith('.cpp') and not header_found:
   3699         return
   3700 
   3701     # All the lines have been processed, report the errors found.
   3702     for required_header_unstripped in required:
   3703         template = required[required_header_unstripped][1]
   3704         if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
   3705             headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
   3706             if [True for header in headers if header in include_state]:
   3707                 continue
   3708         if required_header_unstripped.strip('<>"') not in include_state:
   3709             error(required[required_header_unstripped][0],
   3710                   'build/include_what_you_use', 4,
   3711                   'Add #include ' + required_header_unstripped + ' for ' + template)
   3712 
   3713 
   3714 def process_line(filename, file_extension,
   3715                  clean_lines, line, include_state, function_state,
   3716                  class_state, file_state, enum_state, error):
   3717     """Processes a single line in the file.
   3718 
   3719     Args:
   3720       filename: Filename of the file that is being processed.
   3721       file_extension: The extension (dot not included) of the file.
   3722       clean_lines: An array of strings, each representing a line of the file,
   3723                    with comments stripped.
   3724       line: Number of line being processed.
   3725       include_state: An _IncludeState instance in which the headers are inserted.
   3726       function_state: A _FunctionState instance which counts function lines, etc.
   3727       class_state: A _ClassState instance which maintains information about
   3728                    the current stack of nested class declarations being parsed.
   3729       file_state: A _FileState instance which maintains information about
   3730                   the state of things in the file.
   3731       enum_state: A _EnumState instance which maintains an enum declaration
   3732                   state.
   3733       error: A callable to which errors are reported, which takes arguments:
   3734              line number, error level, and message
   3735 
   3736     """
   3737     raw_lines = clean_lines.raw_lines
   3738     detect_functions(clean_lines, line, function_state, error)
   3739     check_for_function_lengths(clean_lines, line, function_state, error)
   3740     if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
   3741         return
   3742     if match(r'\s*\b__asm\b', raw_lines[line]):  # Ignore asm lines as they format differently.
   3743         return
   3744     check_function_definition(filename, file_extension, clean_lines, line, function_state, error)
   3745     check_pass_ptr_usage(clean_lines, line, function_state, error)
   3746     check_for_leaky_patterns(clean_lines, line, function_state, error)
   3747     check_for_multiline_comments_and_strings(clean_lines, line, error)
   3748     check_style(clean_lines, line, file_extension, class_state, file_state, enum_state, error)
   3749     check_language(filename, clean_lines, line, file_extension, include_state,
   3750                    file_state, error)
   3751     check_for_non_standard_constructs(clean_lines, line, class_state, error)
   3752     check_posix_threading(clean_lines, line, error)
   3753     check_invalid_increment(clean_lines, line, error)
   3754     check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line, error)
   3755 
   3756 def _process_lines(filename, file_extension, lines, error, min_confidence):
   3757     """Performs lint checks and reports any errors to the given error function.
   3758 
   3759     Args:
   3760       filename: Filename of the file that is being processed.
   3761       file_extension: The extension (dot not included) of the file.
   3762       lines: An array of strings, each representing a line of the file, with the
   3763              last element being empty if the file is termined with a newline.
   3764       error: A callable to which errors are reported, which takes 4 arguments:
   3765     """
   3766     lines = (['// marker so line numbers and indices both start at 1'] + lines +
   3767              ['// marker so line numbers end in a known way'])
   3768 
   3769     include_state = _IncludeState()
   3770     function_state = _FunctionState(min_confidence)
   3771     class_state = _ClassState()
   3772 
   3773     check_for_copyright(lines, error)
   3774 
   3775     if file_extension == 'h':
   3776         check_for_header_guard(filename, lines, error)
   3777 
   3778     remove_multi_line_comments(lines, error)
   3779     clean_lines = CleansedLines(lines)
   3780     file_state = _FileState(clean_lines, file_extension)
   3781     enum_state = _EnumState()
   3782     for line in xrange(clean_lines.num_lines()):
   3783         process_line(filename, file_extension, clean_lines, line,
   3784                      include_state, function_state, class_state, file_state,
   3785                      enum_state, error)
   3786     class_state.check_finished(error)
   3787 
   3788     check_for_include_what_you_use(filename, clean_lines, include_state, error)
   3789 
   3790     # We check here rather than inside process_line so that we see raw
   3791     # lines rather than "cleaned" lines.
   3792     check_for_unicode_replacement_characters(lines, error)
   3793 
   3794     check_for_new_line_at_eof(lines, error)
   3795 
   3796 
   3797 class CppChecker(object):
   3798 
   3799     """Processes C++ lines for checking style."""
   3800 
   3801     # This list is used to--
   3802     #
   3803     # (1) generate an explicit list of all possible categories,
   3804     # (2) unit test that all checked categories have valid names, and
   3805     # (3) unit test that all categories are getting unit tested.
   3806     #
   3807     categories = set([
   3808         'build/class',
   3809         'build/deprecated',
   3810         'build/endif_comment',
   3811         'build/forward_decl',
   3812         'build/header_guard',
   3813         'build/include',
   3814         'build/include_order',
   3815         'build/include_what_you_use',
   3816         'build/namespaces',
   3817         'build/printf_format',
   3818         'build/storage_class',
   3819         'build/using_std',
   3820         'legal/copyright',
   3821         'readability/braces',
   3822         'readability/casting',
   3823         'readability/check',
   3824         'readability/comparison_to_zero',
   3825         'readability/constructors',
   3826         'readability/control_flow',
   3827         'readability/enum_casing',
   3828         'readability/fn_size',
   3829         'readability/function',
   3830         'readability/multiline_comment',
   3831         'readability/multiline_string',
   3832         'readability/parameter_name',
   3833         'readability/naming',
   3834         'readability/naming/underscores',
   3835         'readability/null',
   3836         'readability/pass_ptr',
   3837         'readability/streams',
   3838         'readability/todo',
   3839         'readability/utf8',
   3840         'readability/webkit_export',
   3841         'runtime/arrays',
   3842         'runtime/bitfields',
   3843         'runtime/casting',
   3844         'runtime/ctype_function',
   3845         'runtime/explicit',
   3846         'runtime/init',
   3847         'runtime/int',
   3848         'runtime/invalid_increment',
   3849         'runtime/leaky_pattern',
   3850         'runtime/max_min_macros',
   3851         'runtime/memset',
   3852         'runtime/printf',
   3853         'runtime/printf_format',
   3854         'runtime/references',
   3855         'runtime/rtti',
   3856         'runtime/sizeof',
   3857         'runtime/string',
   3858         'runtime/threadsafe_fn',
   3859         'runtime/unsigned',
   3860         'runtime/virtual',
   3861         'whitespace/blank_line',
   3862         'whitespace/braces',
   3863         'whitespace/comma',
   3864         'whitespace/comments',
   3865         'whitespace/declaration',
   3866         'whitespace/end_of_line',
   3867         'whitespace/ending_newline',
   3868         'whitespace/indent',
   3869         'whitespace/line_length',
   3870         'whitespace/newline',
   3871         'whitespace/operators',
   3872         'whitespace/parens',
   3873         'whitespace/semicolon',
   3874         'whitespace/tab',
   3875         'whitespace/todo',
   3876         ])
   3877 
   3878     def __init__(self, file_path, file_extension, handle_style_error,
   3879                  min_confidence):
   3880         """Create a CppChecker instance.
   3881 
   3882         Args:
   3883           file_extension: A string that is the file extension, without
   3884                           the leading dot.
   3885 
   3886         """
   3887         self.file_extension = file_extension
   3888         self.file_path = file_path
   3889         self.handle_style_error = handle_style_error
   3890         self.min_confidence = min_confidence
   3891 
   3892     # Useful for unit testing.
   3893     def __eq__(self, other):
   3894         """Return whether this CppChecker instance is equal to another."""
   3895         if self.file_extension != other.file_extension:
   3896             return False
   3897         if self.file_path != other.file_path:
   3898             return False
   3899         if self.handle_style_error != other.handle_style_error:
   3900             return False
   3901         if self.min_confidence != other.min_confidence:
   3902             return False
   3903 
   3904         return True
   3905 
   3906     # Useful for unit testing.
   3907     def __ne__(self, other):
   3908         # Python does not automatically deduce __ne__() from __eq__().
   3909         return not self.__eq__(other)
   3910 
   3911     def check(self, lines):
   3912         _process_lines(self.file_path, self.file_extension, lines,
   3913                        self.handle_style_error, self.min_confidence)
   3914 
   3915 
   3916 # FIXME: Remove this function (requires refactoring unit tests).
   3917 def process_file_data(filename, file_extension, lines, error, min_confidence, unit_test_config):
   3918     global _unit_test_config
   3919     _unit_test_config = unit_test_config
   3920     checker = CppChecker(filename, file_extension, error, min_confidence)
   3921     checker.check(lines)
   3922     _unit_test_config = {}
   3923