Home | History | Annotate | Download | only in processors
      1 #!/usr/bin/python
      2 # -*- coding: utf-8 -*-
      3 #
      4 # Copyright (C) 2009 Google Inc. All rights reserved.
      5 # Copyright (C) 2009 Torch Mobile Inc.
      6 # Copyright (C) 2009 Apple Inc. All rights reserved.
      7 # Copyright (C) 2010 Chris Jerdonek (cjerdonek (at] webkit.org)
      8 #
      9 # Redistribution and use in source and binary forms, with or without
     10 # modification, are permitted provided that the following conditions are
     11 # met:
     12 #
     13 #    * Redistributions of source code must retain the above copyright
     14 # notice, this list of conditions and the following disclaimer.
     15 #    * Redistributions in binary form must reproduce the above
     16 # copyright notice, this list of conditions and the following disclaimer
     17 # in the documentation and/or other materials provided with the
     18 # distribution.
     19 #    * Neither the name of Google Inc. nor the names of its
     20 # contributors may be used to endorse or promote products derived from
     21 # this software without specific prior written permission.
     22 #
     23 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     24 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     25 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     26 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     27 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     28 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     29 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     30 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     31 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     32 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     33 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     34 
     35 # This is the modified version of Google's cpplint. The original code is
     36 # http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
     37 
     38 """Support for check-webkit-style."""
     39 
     40 import codecs
     41 import math  # for log
     42 import os
     43 import os.path
     44 import re
     45 import sre_compile
     46 import string
     47 import sys
     48 import unicodedata
     49 
     50 
     51 # Headers that we consider STL headers.
     52 _STL_HEADERS = frozenset([
     53     'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
     54     'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
     55     'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
     56     'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
     57     'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
     58     'utility', 'vector', 'vector.h',
     59     ])
     60 
     61 
     62 # Non-STL C++ system headers.
     63 _CPP_HEADERS = frozenset([
     64     'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
     65     'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
     66     'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
     67     'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
     68     'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
     69     'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
     70     'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
     71     'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
     72     'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
     73     'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
     74     'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
     75     'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
     76     'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
     77     ])
     78 
     79 
     80 # Assertion macros.  These are defined in base/logging.h and
     81 # testing/base/gunit.h.  Note that the _M versions need to come first
     82 # for substring matching to work.
     83 _CHECK_MACROS = [
     84     'DCHECK', 'CHECK',
     85     'EXPECT_TRUE_M', 'EXPECT_TRUE',
     86     'ASSERT_TRUE_M', 'ASSERT_TRUE',
     87     'EXPECT_FALSE_M', 'EXPECT_FALSE',
     88     'ASSERT_FALSE_M', 'ASSERT_FALSE',
     89     ]
     90 
     91 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
     92 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
     93 
     94 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
     95                         ('>=', 'GE'), ('>', 'GT'),
     96                         ('<=', 'LE'), ('<', 'LT')]:
     97     _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
     98     _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
     99     _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    100     _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    101     _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
    102     _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
    103 
    104 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
    105                             ('>=', 'LT'), ('>', 'LE'),
    106                             ('<=', 'GT'), ('<', 'GE')]:
    107     _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    108     _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    109     _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
    110     _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
    111 
    112 
    113 # These constants define types of headers for use with
    114 # _IncludeState.check_next_include_order().
    115 _CONFIG_HEADER = 0
    116 _PRIMARY_HEADER = 1
    117 _OTHER_HEADER = 2
    118 _MOC_HEADER = 3
    119 
    120 
    121 # The regexp compilation caching is inlined in all regexp functions for
    122 # performance reasons; factoring it out into a separate function turns out
    123 # to be noticeably expensive.
    124 _regexp_compile_cache = {}
    125 
    126 
    127 def match(pattern, s):
    128     """Matches the string with the pattern, caching the compiled regexp."""
    129     if not pattern in _regexp_compile_cache:
    130         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    131     return _regexp_compile_cache[pattern].match(s)
    132 
    133 
    134 def search(pattern, s):
    135     """Searches the string for the pattern, caching the compiled regexp."""
    136     if not pattern in _regexp_compile_cache:
    137         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    138     return _regexp_compile_cache[pattern].search(s)
    139 
    140 
    141 def sub(pattern, replacement, s):
    142     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    143     if not pattern in _regexp_compile_cache:
    144         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    145     return _regexp_compile_cache[pattern].sub(replacement, s)
    146 
    147 
    148 def subn(pattern, replacement, s):
    149     """Substitutes occurrences of a pattern, caching the compiled regexp."""
    150     if not pattern in _regexp_compile_cache:
    151         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    152     return _regexp_compile_cache[pattern].subn(replacement, s)
    153 
    154 
    155 def up_to_unmatched_closing_paren(s):
    156     """Splits a string into two parts up to first unmatched ')'.
    157 
    158     Args:
    159       s: a string which is a substring of line after '('
    160       (e.g., "a == (b + c))").
    161 
    162     Returns:
    163       A pair of strings (prefix before first unmatched ')',
    164       reminder of s after first unmatched ')'), e.g.,
    165       up_to_unmatched_closing_paren("a == (b + c)) { ")
    166       returns "a == (b + c)", " {".
    167       Returns None, None if there is no unmatched ')'
    168 
    169     """
    170     i = 1
    171     for pos, c in enumerate(s):
    172       if c == '(':
    173         i += 1
    174       elif c == ')':
    175         i -= 1
    176         if i == 0:
    177           return s[:pos], s[pos + 1:]
    178     return None, None
    179 
    180 class _IncludeState(dict):
    181     """Tracks line numbers for includes, and the order in which includes appear.
    182 
    183     As a dict, an _IncludeState object serves as a mapping between include
    184     filename and line number on which that file was included.
    185 
    186     Call check_next_include_order() once for each header in the file, passing
    187     in the type constants defined above. Calls in an illegal order will
    188     raise an _IncludeError with an appropriate error message.
    189 
    190     """
    191     # self._section will move monotonically through this set. If it ever
    192     # needs to move backwards, check_next_include_order will raise an error.
    193     _INITIAL_SECTION = 0
    194     _CONFIG_SECTION = 1
    195     _PRIMARY_SECTION = 2
    196     _OTHER_SECTION = 3
    197 
    198     _TYPE_NAMES = {
    199         _CONFIG_HEADER: 'WebCore config.h',
    200         _PRIMARY_HEADER: 'header this file implements',
    201         _OTHER_HEADER: 'other header',
    202         _MOC_HEADER: 'moc file',
    203         }
    204     _SECTION_NAMES = {
    205         _INITIAL_SECTION: "... nothing.",
    206         _CONFIG_SECTION: "WebCore config.h.",
    207         _PRIMARY_SECTION: 'a header this file implements.',
    208         _OTHER_SECTION: 'other header.',
    209         }
    210 
    211     def __init__(self):
    212         dict.__init__(self)
    213         self._section = self._INITIAL_SECTION
    214         self._visited_primary_section = False
    215         self.header_types = dict();
    216 
    217     def visited_primary_section(self):
    218         return self._visited_primary_section
    219 
    220     def check_next_include_order(self, header_type, file_is_header):
    221         """Returns a non-empty error message if the next header is out of order.
    222 
    223         This function also updates the internal state to be ready to check
    224         the next include.
    225 
    226         Args:
    227           header_type: One of the _XXX_HEADER constants defined above.
    228           file_is_header: Whether the file that owns this _IncludeState is itself a header
    229 
    230         Returns:
    231           The empty string if the header is in the right order, or an
    232           error message describing what's wrong.
    233 
    234         """
    235         if header_type == _CONFIG_HEADER and file_is_header:
    236             return 'Header file should not contain WebCore config.h.'
    237         if header_type == _PRIMARY_HEADER and file_is_header:
    238             return 'Header file should not contain itself.'
    239         if header_type == _MOC_HEADER:
    240             return ''
    241 
    242         error_message = ''
    243         if self._section != self._OTHER_SECTION:
    244             before_error_message = ('Found %s before %s' %
    245                                     (self._TYPE_NAMES[header_type],
    246                                      self._SECTION_NAMES[self._section + 1]))
    247         after_error_message = ('Found %s after %s' %
    248                                 (self._TYPE_NAMES[header_type],
    249                                  self._SECTION_NAMES[self._section]))
    250 
    251         if header_type == _CONFIG_HEADER:
    252             if self._section >= self._CONFIG_SECTION:
    253                 error_message = after_error_message
    254             self._section = self._CONFIG_SECTION
    255         elif header_type == _PRIMARY_HEADER:
    256             if self._section >= self._PRIMARY_SECTION:
    257                 error_message = after_error_message
    258             elif self._section < self._CONFIG_SECTION:
    259                 error_message = before_error_message
    260             self._section = self._PRIMARY_SECTION
    261             self._visited_primary_section = True
    262         else:
    263             assert header_type == _OTHER_HEADER
    264             if not file_is_header and self._section < self._PRIMARY_SECTION:
    265                 error_message = before_error_message
    266             self._section = self._OTHER_SECTION
    267 
    268         return error_message
    269 
    270 
    271 class _FunctionState(object):
    272     """Tracks current function name and the number of lines in its body.
    273 
    274     Attributes:
    275       verbosity: The verbosity level to use while checking style.
    276 
    277     """
    278 
    279     _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    280     _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
    281 
    282     def __init__(self, verbosity):
    283         self.verbosity = verbosity
    284         self.in_a_function = False
    285         self.lines_in_function = 0
    286         self.current_function = ''
    287 
    288     def begin(self, function_name):
    289         """Start analyzing function body.
    290 
    291         Args:
    292             function_name: The name of the function being tracked.
    293         """
    294         self.in_a_function = True
    295         self.lines_in_function = 0
    296         self.current_function = function_name
    297 
    298     def count(self):
    299         """Count line in current function body."""
    300         if self.in_a_function:
    301             self.lines_in_function += 1
    302 
    303     def check(self, error, line_number):
    304         """Report if too many lines in function body.
    305 
    306         Args:
    307           error: The function to call with any errors found.
    308           line_number: The number of the line to check.
    309         """
    310         if match(r'T(EST|est)', self.current_function):
    311             base_trigger = self._TEST_TRIGGER
    312         else:
    313             base_trigger = self._NORMAL_TRIGGER
    314         trigger = base_trigger * 2 ** self.verbosity
    315 
    316         if self.lines_in_function > trigger:
    317             error_level = int(math.log(self.lines_in_function / base_trigger, 2))
    318             # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
    319             if error_level > 5:
    320                 error_level = 5
    321             error(line_number, 'readability/fn_size', error_level,
    322                   'Small and focused functions are preferred:'
    323                   ' %s has %d non-comment lines'
    324                   ' (error triggered by exceeding %d lines).'  % (
    325                       self.current_function, self.lines_in_function, trigger))
    326 
    327     def end(self):
    328         """Stop analizing function body."""
    329         self.in_a_function = False
    330 
    331 
    332 class _IncludeError(Exception):
    333     """Indicates a problem with the include order in a file."""
    334     pass
    335 
    336 
    337 def is_c_or_objective_c(file_extension):
    338    """Return whether the file extension corresponds to C or Objective-C.
    339 
    340    Args:
    341      file_extension: The file extension without the leading dot.
    342 
    343    """
    344    return file_extension in ['c', 'm']
    345 
    346 
    347 class FileInfo:
    348     """Provides utility functions for filenames.
    349 
    350     FileInfo provides easy access to the components of a file's path
    351     relative to the project root.
    352     """
    353 
    354     def __init__(self, filename):
    355         self._filename = filename
    356 
    357     def full_name(self):
    358         """Make Windows paths like Unix."""
    359         return os.path.abspath(self._filename).replace('\\', '/')
    360 
    361     def repository_name(self):
    362         """Full name after removing the local path to the repository.
    363 
    364         If we have a real absolute path name here we can try to do something smart:
    365         detecting the root of the checkout and truncating /path/to/checkout from
    366         the name so that we get header guards that don't include things like
    367         "C:\Documents and Settings\..." or "/home/username/..." in them and thus
    368         people on different computers who have checked the source out to different
    369         locations won't see bogus errors.
    370         """
    371         fullname = self.full_name()
    372 
    373         if os.path.exists(fullname):
    374             project_dir = os.path.dirname(fullname)
    375 
    376             if os.path.exists(os.path.join(project_dir, ".svn")):
    377                 # If there's a .svn file in the current directory, we
    378                 # recursively look up the directory tree for the top
    379                 # of the SVN checkout
    380                 root_dir = project_dir
    381                 one_up_dir = os.path.dirname(root_dir)
    382                 while os.path.exists(os.path.join(one_up_dir, ".svn")):
    383                     root_dir = os.path.dirname(root_dir)
    384                     one_up_dir = os.path.dirname(one_up_dir)
    385 
    386                 prefix = os.path.commonprefix([root_dir, project_dir])
    387                 return fullname[len(prefix) + 1:]
    388 
    389             # Not SVN? Try to find a git top level directory by
    390             # searching up from the current path.
    391             root_dir = os.path.dirname(fullname)
    392             while (root_dir != os.path.dirname(root_dir)
    393                    and not os.path.exists(os.path.join(root_dir, ".git"))):
    394                 root_dir = os.path.dirname(root_dir)
    395                 if os.path.exists(os.path.join(root_dir, ".git")):
    396                     prefix = os.path.commonprefix([root_dir, project_dir])
    397                     return fullname[len(prefix) + 1:]
    398 
    399         # Don't know what to do; header guard warnings may be wrong...
    400         return fullname
    401 
    402     def split(self):
    403         """Splits the file into the directory, basename, and extension.
    404 
    405         For 'chrome/browser/browser.cpp', Split() would
    406         return ('chrome/browser', 'browser', '.cpp')
    407 
    408         Returns:
    409           A tuple of (directory, basename, extension).
    410         """
    411 
    412         googlename = self.repository_name()
    413         project, rest = os.path.split(googlename)
    414         return (project,) + os.path.splitext(rest)
    415 
    416     def base_name(self):
    417         """File base name - text after the final slash, before the final period."""
    418         return self.split()[1]
    419 
    420     def extension(self):
    421         """File extension - text following the final period."""
    422         return self.split()[2]
    423 
    424     def no_extension(self):
    425         """File has no source file extension."""
    426         return '/'.join(self.split()[0:2])
    427 
    428     def is_source(self):
    429         """File has a source file extension."""
    430         return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
    431 
    432 
    433 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
    434 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
    435     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
    436 # Matches strings.  Escape codes should already be removed by ESCAPES.
    437 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
    438 # Matches characters.  Escape codes should already be removed by ESCAPES.
    439 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
    440 # Matches multi-line C++ comments.
    441 # This RE is a little bit more complicated than one might expect, because we
    442 # have to take care of space removals tools so we can handle comments inside
    443 # statements better.
    444 # The current rule is: We only clear spaces from both sides when we're at the
    445 # end of the line. Otherwise, we try to remove spaces from the right side,
    446 # if this doesn't work we try on left side but only if there's a non-character
    447 # on the right.
    448 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
    449     r"""(\s*/\*.*\*/\s*$|
    450             /\*.*\*/\s+|
    451          \s+/\*.*\*/(?=\W)|
    452             /\*.*\*/)""", re.VERBOSE)
    453 
    454 
    455 def is_cpp_string(line):
    456     """Does line terminate so, that the next symbol is in string constant.
    457 
    458     This function does not consider single-line nor multi-line comments.
    459 
    460     Args:
    461       line: is a partial line of code starting from the 0..n.
    462 
    463     Returns:
    464       True, if next character appended to 'line' is inside a
    465       string constant.
    466     """
    467 
    468     line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
    469     return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
    470 
    471 
    472 def find_next_multi_line_comment_start(lines, line_index):
    473     """Find the beginning marker for a multiline comment."""
    474     while line_index < len(lines):
    475         if lines[line_index].strip().startswith('/*'):
    476             # Only return this marker if the comment goes beyond this line
    477             if lines[line_index].strip().find('*/', 2) < 0:
    478                 return line_index
    479         line_index += 1
    480     return len(lines)
    481 
    482 
    483 def find_next_multi_line_comment_end(lines, line_index):
    484     """We are inside a comment, find the end marker."""
    485     while line_index < len(lines):
    486         if lines[line_index].strip().endswith('*/'):
    487             return line_index
    488         line_index += 1
    489     return len(lines)
    490 
    491 
    492 def remove_multi_line_comments_from_range(lines, begin, end):
    493     """Clears a range of lines for multi-line comments."""
    494     # Having // dummy comments makes the lines non-empty, so we will not get
    495     # unnecessary blank line warnings later in the code.
    496     for i in range(begin, end):
    497         lines[i] = '// dummy'
    498 
    499 
    500 def remove_multi_line_comments(lines, error):
    501     """Removes multiline (c-style) comments from lines."""
    502     line_index = 0
    503     while line_index < len(lines):
    504         line_index_begin = find_next_multi_line_comment_start(lines, line_index)
    505         if line_index_begin >= len(lines):
    506             return
    507         line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
    508         if line_index_end >= len(lines):
    509             error(line_index_begin + 1, 'readability/multiline_comment', 5,
    510                   'Could not find end of multi-line comment')
    511             return
    512         remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
    513         line_index = line_index_end + 1
    514 
    515 
    516 def cleanse_comments(line):
    517     """Removes //-comments and single-line C-style /* */ comments.
    518 
    519     Args:
    520       line: A line of C++ source.
    521 
    522     Returns:
    523       The line with single-line comments removed.
    524     """
    525     comment_position = line.find('//')
    526     if comment_position != -1 and not is_cpp_string(line[:comment_position]):
    527         line = line[:comment_position]
    528     # get rid of /* ... */
    529     return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
    530 
    531 
    532 class CleansedLines(object):
    533     """Holds 3 copies of all lines with different preprocessing applied to them.
    534 
    535     1) elided member contains lines without strings and comments,
    536     2) lines member contains lines without comments, and
    537     3) raw member contains all the lines without processing.
    538     All these three members are of <type 'list'>, and of the same length.
    539     """
    540 
    541     def __init__(self, lines):
    542         self.elided = []
    543         self.lines = []
    544         self.raw_lines = lines
    545         self._num_lines = len(lines)
    546         for line_number in range(len(lines)):
    547             self.lines.append(cleanse_comments(lines[line_number]))
    548             elided = self.collapse_strings(lines[line_number])
    549             self.elided.append(cleanse_comments(elided))
    550 
    551     def num_lines(self):
    552         """Returns the number of lines represented."""
    553         return self._num_lines
    554 
    555     @staticmethod
    556     def collapse_strings(elided):
    557         """Collapses strings and chars on a line to simple "" or '' blocks.
    558 
    559         We nix strings first so we're not fooled by text like '"http://"'
    560 
    561         Args:
    562           elided: The line being processed.
    563 
    564         Returns:
    565           The line with collapsed strings.
    566         """
    567         if not _RE_PATTERN_INCLUDE.match(elided):
    568             # Remove escaped characters first to make quote/single quote collapsing
    569             # basic.  Things that look like escaped characters shouldn't occur
    570             # outside of strings and chars.
    571             elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
    572             elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
    573             elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
    574         return elided
    575 
    576 
    577 def close_expression(clean_lines, line_number, pos):
    578     """If input points to ( or { or [, finds the position that closes it.
    579 
    580     If lines[line_number][pos] points to a '(' or '{' or '[', finds the the
    581     line_number/pos that correspond to the closing of the expression.
    582 
    583     Args:
    584       clean_lines: A CleansedLines instance containing the file.
    585       line_number: The number of the line to check.
    586       pos: A position on the line.
    587 
    588     Returns:
    589       A tuple (line, line_number, pos) pointer *past* the closing brace, or
    590       (line, len(lines), -1) if we never find a close.  Note we ignore
    591       strings and comments when matching; and the line we return is the
    592       'cleansed' line at line_number.
    593     """
    594 
    595     line = clean_lines.elided[line_number]
    596     start_character = line[pos]
    597     if start_character not in '({[':
    598         return (line, clean_lines.num_lines(), -1)
    599     if start_character == '(':
    600         end_character = ')'
    601     if start_character == '[':
    602         end_character = ']'
    603     if start_character == '{':
    604         end_character = '}'
    605 
    606     num_open = line.count(start_character) - line.count(end_character)
    607     while line_number < clean_lines.num_lines() and num_open > 0:
    608         line_number += 1
    609         line = clean_lines.elided[line_number]
    610         num_open += line.count(start_character) - line.count(end_character)
    611     # OK, now find the end_character that actually got us back to even
    612     endpos = len(line)
    613     while num_open >= 0:
    614         endpos = line.rfind(')', 0, endpos)
    615         num_open -= 1                 # chopped off another )
    616     return (line, line_number, endpos + 1)
    617 
    618 
    619 def check_for_copyright(lines, error):
    620     """Logs an error if no Copyright message appears at the top of the file."""
    621 
    622     # We'll say it should occur by line 10. Don't forget there's a
    623     # dummy line at the front.
    624     for line in xrange(1, min(len(lines), 11)):
    625         if re.search(r'Copyright', lines[line], re.I):
    626             break
    627     else:                       # means no copyright line was found
    628         error(0, 'legal/copyright', 5,
    629               'No copyright message found.  '
    630               'You should have a line: "Copyright [year] <Copyright Owner>"')
    631 
    632 
    633 def get_header_guard_cpp_variable(filename):
    634     """Returns the CPP variable that should be used as a header guard.
    635 
    636     Args:
    637       filename: The name of a C++ header file.
    638 
    639     Returns:
    640       The CPP variable that should be used as a header guard in the
    641       named file.
    642 
    643     """
    644 
    645     return sub(r'[-.\s]', '_', os.path.basename(filename))
    646 
    647 
    648 def check_for_header_guard(filename, lines, error):
    649     """Checks that the file contains a header guard.
    650 
    651     Logs an error if no #ifndef header guard is present.  For other
    652     headers, checks that the full pathname is used.
    653 
    654     Args:
    655       filename: The name of the C++ header file.
    656       lines: An array of strings, each representing a line of the file.
    657       error: The function to call with any errors found.
    658     """
    659 
    660     cppvar = get_header_guard_cpp_variable(filename)
    661 
    662     ifndef = None
    663     ifndef_line_number = 0
    664     define = None
    665     for line_number, line in enumerate(lines):
    666         line_split = line.split()
    667         if len(line_split) >= 2:
    668             # find the first occurrence of #ifndef and #define, save arg
    669             if not ifndef and line_split[0] == '#ifndef':
    670                 # set ifndef to the header guard presented on the #ifndef line.
    671                 ifndef = line_split[1]
    672                 ifndef_line_number = line_number
    673             if not define and line_split[0] == '#define':
    674                 define = line_split[1]
    675             if define and ifndef:
    676                 break
    677 
    678     if not ifndef or not define or ifndef != define:
    679         error(0, 'build/header_guard', 5,
    680               'No #ifndef header guard found, suggested CPP variable is: %s' %
    681               cppvar)
    682         return
    683 
    684     # The guard should be File_h.
    685     if ifndef != cppvar:
    686         error(ifndef_line_number, 'build/header_guard', 5,
    687               '#ifndef header guard has wrong style, please use: %s' % cppvar)
    688 
    689 
    690 def check_for_unicode_replacement_characters(lines, error):
    691     """Logs an error for each line containing Unicode replacement characters.
    692 
    693     These indicate that either the file contained invalid UTF-8 (likely)
    694     or Unicode replacement characters (which it shouldn't).  Note that
    695     it's possible for this to throw off line numbering if the invalid
    696     UTF-8 occurred adjacent to a newline.
    697 
    698     Args:
    699       lines: An array of strings, each representing a line of the file.
    700       error: The function to call with any errors found.
    701     """
    702     for line_number, line in enumerate(lines):
    703         if u'\ufffd' in line:
    704             error(line_number, 'readability/utf8', 5,
    705                   'Line contains invalid UTF-8 (or Unicode replacement character).')
    706 
    707 
    708 def check_for_new_line_at_eof(lines, error):
    709     """Logs an error if there is no newline char at the end of the file.
    710 
    711     Args:
    712       lines: An array of strings, each representing a line of the file.
    713       error: The function to call with any errors found.
    714     """
    715 
    716     # The array lines() was created by adding two newlines to the
    717     # original file (go figure), then splitting on \n.
    718     # To verify that the file ends in \n, we just have to make sure the
    719     # last-but-two element of lines() exists and is empty.
    720     if len(lines) < 3 or lines[-2]:
    721         error(len(lines) - 2, 'whitespace/ending_newline', 5,
    722               'Could not find a newline character at the end of the file.')
    723 
    724 
    725 def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
    726     """Logs an error if we see /* ... */ or "..." that extend past one line.
    727 
    728     /* ... */ comments are legit inside macros, for one line.
    729     Otherwise, we prefer // comments, so it's ok to warn about the
    730     other.  Likewise, it's ok for strings to extend across multiple
    731     lines, as long as a line continuation character (backslash)
    732     terminates each line. Although not currently prohibited by the C++
    733     style guide, it's ugly and unnecessary. We don't do well with either
    734     in this lint program, so we warn about both.
    735 
    736     Args:
    737       clean_lines: A CleansedLines instance containing the file.
    738       line_number: The number of the line to check.
    739       error: The function to call with any errors found.
    740     """
    741     line = clean_lines.elided[line_number]
    742 
    743     # Remove all \\ (escaped backslashes) from the line. They are OK, and the
    744     # second (escaped) slash may trigger later \" detection erroneously.
    745     line = line.replace('\\\\', '')
    746 
    747     if line.count('/*') > line.count('*/'):
    748         error(line_number, 'readability/multiline_comment', 5,
    749               'Complex multi-line /*...*/-style comment found. '
    750               'Lint may give bogus warnings.  '
    751               'Consider replacing these with //-style comments, '
    752               'with #if 0...#endif, '
    753               'or with more clearly structured multi-line comments.')
    754 
    755     if (line.count('"') - line.count('\\"')) % 2:
    756         error(line_number, 'readability/multiline_string', 5,
    757               'Multi-line string ("...") found.  This lint script doesn\'t '
    758               'do well with such strings, and may give bogus warnings.  They\'re '
    759               'ugly and unnecessary, and you should use concatenation instead".')
    760 
    761 
    762 _THREADING_LIST = (
    763     ('asctime(', 'asctime_r('),
    764     ('ctime(', 'ctime_r('),
    765     ('getgrgid(', 'getgrgid_r('),
    766     ('getgrnam(', 'getgrnam_r('),
    767     ('getlogin(', 'getlogin_r('),
    768     ('getpwnam(', 'getpwnam_r('),
    769     ('getpwuid(', 'getpwuid_r('),
    770     ('gmtime(', 'gmtime_r('),
    771     ('localtime(', 'localtime_r('),
    772     ('rand(', 'rand_r('),
    773     ('readdir(', 'readdir_r('),
    774     ('strtok(', 'strtok_r('),
    775     ('ttyname(', 'ttyname_r('),
    776     )
    777 
    778 
    779 def check_posix_threading(clean_lines, line_number, error):
    780     """Checks for calls to thread-unsafe functions.
    781 
    782     Much code has been originally written without consideration of
    783     multi-threading. Also, engineers are relying on their old experience;
    784     they have learned posix before threading extensions were added. These
    785     tests guide the engineers to use thread-safe functions (when using
    786     posix directly).
    787 
    788     Args:
    789       clean_lines: A CleansedLines instance containing the file.
    790       line_number: The number of the line to check.
    791       error: The function to call with any errors found.
    792     """
    793     line = clean_lines.elided[line_number]
    794     for single_thread_function, multithread_safe_function in _THREADING_LIST:
    795         index = line.find(single_thread_function)
    796         # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
    797         if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
    798                                           and line[index - 1] not in ('_', '.', '>'))):
    799             error(line_number, 'runtime/threadsafe_fn', 2,
    800                   'Consider using ' + multithread_safe_function +
    801                   '...) instead of ' + single_thread_function +
    802                   '...) for improved thread safety.')
    803 
    804 
    805 # Matches invalid increment: *count++, which moves pointer instead of
    806 # incrementing a value.
    807 _RE_PATTERN_INVALID_INCREMENT = re.compile(
    808     r'^\s*\*\w+(\+\+|--);')
    809 
    810 
    811 def check_invalid_increment(clean_lines, line_number, error):
    812     """Checks for invalid increment *count++.
    813 
    814     For example following function:
    815     void increment_counter(int* count) {
    816         *count++;
    817     }
    818     is invalid, because it effectively does count++, moving pointer, and should
    819     be replaced with ++*count, (*count)++ or *count += 1.
    820 
    821     Args:
    822       clean_lines: A CleansedLines instance containing the file.
    823       line_number: The number of the line to check.
    824       error: The function to call with any errors found.
    825     """
    826     line = clean_lines.elided[line_number]
    827     if _RE_PATTERN_INVALID_INCREMENT.match(line):
    828         error(line_number, 'runtime/invalid_increment', 5,
    829               'Changing pointer instead of value (or unused value of operator*).')
    830 
    831 
    832 class _ClassInfo(object):
    833     """Stores information about a class."""
    834 
    835     def __init__(self, name, line_number):
    836         self.name = name
    837         self.line_number = line_number
    838         self.seen_open_brace = False
    839         self.is_derived = False
    840         self.virtual_method_line_number = None
    841         self.has_virtual_destructor = False
    842         self.brace_depth = 0
    843 
    844 
    845 class _ClassState(object):
    846     """Holds the current state of the parse relating to class declarations.
    847 
    848     It maintains a stack of _ClassInfos representing the parser's guess
    849     as to the current nesting of class declarations. The innermost class
    850     is at the top (back) of the stack. Typically, the stack will either
    851     be empty or have exactly one entry.
    852     """
    853 
    854     def __init__(self):
    855         self.classinfo_stack = []
    856 
    857     def check_finished(self, error):
    858         """Checks that all classes have been completely parsed.
    859 
    860         Call this when all lines in a file have been processed.
    861         Args:
    862           error: The function to call with any errors found.
    863         """
    864         if self.classinfo_stack:
    865             # Note: This test can result in false positives if #ifdef constructs
    866             # get in the way of brace matching. See the testBuildClass test in
    867             # cpp_style_unittest.py for an example of this.
    868             error(self.classinfo_stack[0].line_number, 'build/class', 5,
    869                   'Failed to find complete declaration of class %s' %
    870                   self.classinfo_stack[0].name)
    871 
    872 
    873 class _FileState(object):
    874     def __init__(self):
    875         self._did_inside_namespace_indent_warning = False
    876 
    877     def set_did_inside_namespace_indent_warning(self):
    878         self._did_inside_namespace_indent_warning = True
    879 
    880     def did_inside_namespace_indent_warning(self):
    881         return self._did_inside_namespace_indent_warning
    882 
    883 def check_for_non_standard_constructs(clean_lines, line_number,
    884                                       class_state, error):
    885     """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
    886 
    887     Complain about several constructs which gcc-2 accepts, but which are
    888     not standard C++.  Warning about these in lint is one way to ease the
    889     transition to new compilers.
    890     - put storage class first (e.g. "static const" instead of "const static").
    891     - "%lld" instead of %qd" in printf-type functions.
    892     - "%1$d" is non-standard in printf-type functions.
    893     - "\%" is an undefined character escape sequence.
    894     - text after #endif is not allowed.
    895     - invalid inner-style forward declaration.
    896     - >? and <? operators, and their >?= and <?= cousins.
    897     - classes with virtual methods need virtual destructors (compiler warning
    898         available, but not turned on yet.)
    899 
    900     Additionally, check for constructor/destructor style violations as it
    901     is very convenient to do so while checking for gcc-2 compliance.
    902 
    903     Args:
    904       clean_lines: A CleansedLines instance containing the file.
    905       line_number: The number of the line to check.
    906       class_state: A _ClassState instance which maintains information about
    907                    the current stack of nested class declarations being parsed.
    908       error: A callable to which errors are reported, which takes parameters:
    909              line number, error level, and message
    910     """
    911 
    912     # Remove comments from the line, but leave in strings for now.
    913     line = clean_lines.lines[line_number]
    914 
    915     if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
    916         error(line_number, 'runtime/printf_format', 3,
    917               '%q in format strings is deprecated.  Use %ll instead.')
    918 
    919     if search(r'printf\s*\(.*".*%\d+\$', line):
    920         error(line_number, 'runtime/printf_format', 2,
    921               '%N$ formats are unconventional.  Try rewriting to avoid them.')
    922 
    923     # Remove escaped backslashes before looking for undefined escapes.
    924     line = line.replace('\\\\', '')
    925 
    926     if search(r'("|\').*\\(%|\[|\(|{)', line):
    927         error(line_number, 'build/printf_format', 3,
    928               '%, [, (, and { are undefined character escapes.  Unescape them.')
    929 
    930     # For the rest, work with both comments and strings removed.
    931     line = clean_lines.elided[line_number]
    932 
    933     if search(r'\b(const|volatile|void|char|short|int|long'
    934               r'|float|double|signed|unsigned'
    935               r'|schar|u?int8|u?int16|u?int32|u?int64)'
    936               r'\s+(auto|register|static|extern|typedef)\b',
    937               line):
    938         error(line_number, 'build/storage_class', 5,
    939               'Storage class (static, extern, typedef, etc) should be first.')
    940 
    941     if match(r'\s*#\s*endif\s*[^/\s]+', line):
    942         error(line_number, 'build/endif_comment', 5,
    943               'Uncommented text after #endif is non-standard.  Use a comment.')
    944 
    945     if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
    946         error(line_number, 'build/forward_decl', 5,
    947               'Inner-style forward declarations are invalid.  Remove this line.')
    948 
    949     if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
    950         error(line_number, 'build/deprecated', 3,
    951               '>? and <? (max and min) operators are non-standard and deprecated.')
    952 
    953     # Track class entry and exit, and attempt to find cases within the
    954     # class declaration that don't meet the C++ style
    955     # guidelines. Tracking is very dependent on the code matching Google
    956     # style guidelines, but it seems to perform well enough in testing
    957     # to be a worthwhile addition to the checks.
    958     classinfo_stack = class_state.classinfo_stack
    959     # Look for a class declaration
    960     class_decl_match = match(
    961         r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
    962     if class_decl_match:
    963         classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
    964 
    965     # Everything else in this function uses the top of the stack if it's
    966     # not empty.
    967     if not classinfo_stack:
    968         return
    969 
    970     classinfo = classinfo_stack[-1]
    971 
    972     # If the opening brace hasn't been seen look for it and also
    973     # parent class declarations.
    974     if not classinfo.seen_open_brace:
    975         # If the line has a ';' in it, assume it's a forward declaration or
    976         # a single-line class declaration, which we won't process.
    977         if line.find(';') != -1:
    978             classinfo_stack.pop()
    979             return
    980         classinfo.seen_open_brace = (line.find('{') != -1)
    981         # Look for a bare ':'
    982         if search('(^|[^:]):($|[^:])', line):
    983             classinfo.is_derived = True
    984         if not classinfo.seen_open_brace:
    985             return  # Everything else in this function is for after open brace
    986 
    987     # The class may have been declared with namespace or classname qualifiers.
    988     # The constructor and destructor will not have those qualifiers.
    989     base_classname = classinfo.name.split('::')[-1]
    990 
    991     # Look for single-argument constructors that aren't marked explicit.
    992     # Technically a valid construct, but against style.
    993     args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
    994                  % re.escape(base_classname),
    995                  line)
    996     if (args
    997         and args.group(1) != 'void'
    998         and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
    999                       args.group(1).strip())):
   1000         error(line_number, 'runtime/explicit', 5,
   1001               'Single-argument constructors should be marked explicit.')
   1002 
   1003     # Look for methods declared virtual.
   1004     if search(r'\bvirtual\b', line):
   1005         classinfo.virtual_method_line_number = line_number
   1006         # Only look for a destructor declaration on the same line. It would
   1007         # be extremely unlikely for the destructor declaration to occupy
   1008         # more than one line.
   1009         if search(r'~%s\s*\(' % base_classname, line):
   1010             classinfo.has_virtual_destructor = True
   1011 
   1012     # Look for class end.
   1013     brace_depth = classinfo.brace_depth
   1014     brace_depth = brace_depth + line.count('{') - line.count('}')
   1015     if brace_depth <= 0:
   1016         classinfo = classinfo_stack.pop()
   1017         # Try to detect missing virtual destructor declarations.
   1018         # For now, only warn if a non-derived class with virtual methods lacks
   1019         # a virtual destructor. This is to make it less likely that people will
   1020         # declare derived virtual destructors without declaring the base
   1021         # destructor virtual.
   1022         if ((classinfo.virtual_method_line_number is not None)
   1023             and (not classinfo.has_virtual_destructor)
   1024             and (not classinfo.is_derived)):  # Only warn for base classes
   1025             error(classinfo.line_number, 'runtime/virtual', 4,
   1026                   'The class %s probably needs a virtual destructor due to '
   1027                   'having virtual method(s), one declared at line %d.'
   1028                   % (classinfo.name, classinfo.virtual_method_line_number))
   1029     else:
   1030         classinfo.brace_depth = brace_depth
   1031 
   1032 
   1033 def check_spacing_for_function_call(line, line_number, error):
   1034     """Checks for the correctness of various spacing around function calls.
   1035 
   1036     Args:
   1037       line: The text of the line to check.
   1038       line_number: The number of the line to check.
   1039       error: The function to call with any errors found.
   1040     """
   1041 
   1042     # Since function calls often occur inside if/for/foreach/while/switch
   1043     # expressions - which have their own, more liberal conventions - we
   1044     # first see if we should be looking inside such an expression for a
   1045     # function call, to which we can apply more strict standards.
   1046     function_call = line    # if there's no control flow construct, look at whole line
   1047     for pattern in (r'\bif\s*\((.*)\)\s*{',
   1048                     r'\bfor\s*\((.*)\)\s*{',
   1049                     r'\bforeach\s*\((.*)\)\s*{',
   1050                     r'\bwhile\s*\((.*)\)\s*[{;]',
   1051                     r'\bswitch\s*\((.*)\)\s*{'):
   1052         matched = search(pattern, line)
   1053         if matched:
   1054             function_call = matched.group(1)    # look inside the parens for function calls
   1055             break
   1056 
   1057     # Except in if/for/foreach/while/switch, there should never be space
   1058     # immediately inside parens (eg "f( 3, 4 )").  We make an exception
   1059     # for nested parens ( (a+b) + c ).  Likewise, there should never be
   1060     # a space before a ( when it's a function argument.  I assume it's a
   1061     # function argument when the char before the whitespace is legal in
   1062     # a function name (alnum + _) and we're not starting a macro. Also ignore
   1063     # pointers and references to arrays and functions coz they're too tricky:
   1064     # we use a very simple way to recognize these:
   1065     # " (something)(maybe-something)" or
   1066     # " (something)(maybe-something," or
   1067     # " (something)[something]"
   1068     # Note that we assume the contents of [] to be short enough that
   1069     # they'll never need to wrap.
   1070     if (  # Ignore control structures.
   1071         not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
   1072         # Ignore pointers/references to functions.
   1073         and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
   1074         # Ignore pointers/references to arrays.
   1075         and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
   1076         if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
   1077             error(line_number, 'whitespace/parens', 4,
   1078                   'Extra space after ( in function call')
   1079         elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
   1080             error(line_number, 'whitespace/parens', 2,
   1081                   'Extra space after (')
   1082         if (search(r'\w\s+\(', function_call)
   1083             and not search(r'#\s*define|typedef', function_call)):
   1084             error(line_number, 'whitespace/parens', 4,
   1085                   'Extra space before ( in function call')
   1086         # If the ) is followed only by a newline or a { + newline, assume it's
   1087         # part of a control statement (if/while/etc), and don't complain
   1088         if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
   1089             error(line_number, 'whitespace/parens', 2,
   1090                   'Extra space before )')
   1091 
   1092 
   1093 def is_blank_line(line):
   1094     """Returns true if the given line is blank.
   1095 
   1096     We consider a line to be blank if the line is empty or consists of
   1097     only white spaces.
   1098 
   1099     Args:
   1100       line: A line of a string.
   1101 
   1102     Returns:
   1103       True, if the given line is blank.
   1104     """
   1105     return not line or line.isspace()
   1106 
   1107 
   1108 def check_for_function_lengths(clean_lines, line_number, function_state, error):
   1109     """Reports for long function bodies.
   1110 
   1111     For an overview why this is done, see:
   1112     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
   1113 
   1114     Uses a simplistic algorithm assuming other style guidelines
   1115     (especially spacing) are followed.
   1116     Only checks unindented functions, so class members are unchecked.
   1117     Trivial bodies are unchecked, so constructors with huge initializer lists
   1118     may be missed.
   1119     Blank/comment lines are not counted so as to avoid encouraging the removal
   1120     of vertical space and commments just to get through a lint check.
   1121     NOLINT *on the last line of a function* disables this check.
   1122 
   1123     Args:
   1124       clean_lines: A CleansedLines instance containing the file.
   1125       line_number: The number of the line to check.
   1126       function_state: Current function name and lines in body so far.
   1127       error: The function to call with any errors found.
   1128     """
   1129     lines = clean_lines.lines
   1130     line = lines[line_number]
   1131     raw = clean_lines.raw_lines
   1132     raw_line = raw[line_number]
   1133     joined_line = ''
   1134 
   1135     starting_func = False
   1136     regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
   1137     match_result = match(regexp, line)
   1138     if match_result:
   1139         # If the name is all caps and underscores, figure it's a macro and
   1140         # ignore it, unless it's TEST or TEST_F.
   1141         function_name = match_result.group(1).split()[-1]
   1142         if function_name == 'TEST' or function_name == 'TEST_F' or (not match(r'[A-Z_]+$', function_name)):
   1143             starting_func = True
   1144 
   1145     if starting_func:
   1146         body_found = False
   1147         for start_line_number in xrange(line_number, clean_lines.num_lines()):
   1148             start_line = lines[start_line_number]
   1149             joined_line += ' ' + start_line.lstrip()
   1150             if search(r'(;|})', start_line):  # Declarations and trivial functions
   1151                 body_found = True
   1152                 break                              # ... ignore
   1153             if search(r'{', start_line):
   1154                 body_found = True
   1155                 function = search(r'((\w|:)*)\(', line).group(1)
   1156                 if match(r'TEST', function):    # Handle TEST... macros
   1157                     parameter_regexp = search(r'(\(.*\))', joined_line)
   1158                     if parameter_regexp:             # Ignore bad syntax
   1159                         function += parameter_regexp.group(1)
   1160                 else:
   1161                     function += '()'
   1162                 function_state.begin(function)
   1163                 break
   1164         if not body_found:
   1165             # No body for the function (or evidence of a non-function) was found.
   1166             error(line_number, 'readability/fn_size', 5,
   1167                   'Lint failed to find start of function body.')
   1168     elif match(r'^\}\s*$', line):  # function end
   1169         if not search(r'\bNOLINT\b', raw_line):
   1170             function_state.check(error, line_number)
   1171         function_state.end()
   1172     elif not match(r'^\s*$', line):
   1173         function_state.count()  # Count non-blank/non-comment lines.
   1174 
   1175 
   1176 def check_spacing(file_extension, clean_lines, line_number, error):
   1177     """Checks for the correctness of various spacing issues in the code.
   1178 
   1179     Things we check for: spaces around operators, spaces after
   1180     if/for/while/switch, no spaces around parens in function calls, two
   1181     spaces between code and comment, don't start a block with a blank
   1182     line, don't end a function with a blank line, don't have too many
   1183     blank lines in a row.
   1184 
   1185     Args:
   1186       file_extension: The current file extension, without the leading dot.
   1187       clean_lines: A CleansedLines instance containing the file.
   1188       line_number: The number of the line to check.
   1189       error: The function to call with any errors found.
   1190     """
   1191 
   1192     raw = clean_lines.raw_lines
   1193     line = raw[line_number]
   1194 
   1195     # Before nixing comments, check if the line is blank for no good
   1196     # reason.  This includes the first line after a block is opened, and
   1197     # blank lines at the end of a function (ie, right before a line like '}').
   1198     if is_blank_line(line):
   1199         elided = clean_lines.elided
   1200         previous_line = elided[line_number - 1]
   1201         previous_brace = previous_line.rfind('{')
   1202         # FIXME: Don't complain if line before blank line, and line after,
   1203         #        both start with alnums and are indented the same amount.
   1204         #        This ignores whitespace at the start of a namespace block
   1205         #        because those are not usually indented.
   1206         if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
   1207             and previous_line[:previous_brace].find('namespace') == -1):
   1208             # OK, we have a blank line at the start of a code block.  Before we
   1209             # complain, we check if it is an exception to the rule: The previous
   1210             # non-empty line has the parameters of a function header that are indented
   1211             # 4 spaces (because they did not fit in a 80 column line when placed on
   1212             # the same line as the function name).  We also check for the case where
   1213             # the previous line is indented 6 spaces, which may happen when the
   1214             # initializers of a constructor do not fit into a 80 column line.
   1215             exception = False
   1216             if match(r' {6}\w', previous_line):  # Initializer list?
   1217                 # We are looking for the opening column of initializer list, which
   1218                 # should be indented 4 spaces to cause 6 space indentation afterwards.
   1219                 search_position = line_number - 2
   1220                 while (search_position >= 0
   1221                        and match(r' {6}\w', elided[search_position])):
   1222                     search_position -= 1
   1223                 exception = (search_position >= 0
   1224                              and elided[search_position][:5] == '    :')
   1225             else:
   1226                 # Search for the function arguments or an initializer list.  We use a
   1227                 # simple heuristic here: If the line is indented 4 spaces; and we have a
   1228                 # closing paren, without the opening paren, followed by an opening brace
   1229                 # or colon (for initializer lists) we assume that it is the last line of
   1230                 # a function header.  If we have a colon indented 4 spaces, it is an
   1231                 # initializer list.
   1232                 exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
   1233                                    previous_line)
   1234                              or match(r' {4}:', previous_line))
   1235 
   1236             if not exception:
   1237                 error(line_number, 'whitespace/blank_line', 2,
   1238                       'Blank line at the start of a code block.  Is this needed?')
   1239         # This doesn't ignore whitespace at the end of a namespace block
   1240         # because that is too hard without pairing open/close braces;
   1241         # however, a special exception is made for namespace closing
   1242         # brackets which have a comment containing "namespace".
   1243         #
   1244         # Also, ignore blank lines at the end of a block in a long if-else
   1245         # chain, like this:
   1246         #   if (condition1) {
   1247         #     // Something followed by a blank line
   1248         #
   1249         #   } else if (condition2) {
   1250         #     // Something else
   1251         #   }
   1252         if line_number + 1 < clean_lines.num_lines():
   1253             next_line = raw[line_number + 1]
   1254             if (next_line
   1255                 and match(r'\s*}', next_line)
   1256                 and next_line.find('namespace') == -1
   1257                 and next_line.find('} else ') == -1):
   1258                 error(line_number, 'whitespace/blank_line', 3,
   1259                       'Blank line at the end of a code block.  Is this needed?')
   1260 
   1261     # Next, we complain if there's a comment too near the text
   1262     comment_position = line.find('//')
   1263     if comment_position != -1:
   1264         # Check if the // may be in quotes.  If so, ignore it
   1265         # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
   1266         if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
   1267             # Allow one space before end of line comment.
   1268             if (not match(r'^\s*$', line[:comment_position])
   1269                 and (comment_position >= 1
   1270                 and ((line[comment_position - 1] not in string.whitespace)
   1271                      or (comment_position >= 2
   1272                          and line[comment_position - 2] in string.whitespace)))):
   1273                 error(line_number, 'whitespace/comments', 5,
   1274                       'One space before end of line comments')
   1275             # There should always be a space between the // and the comment
   1276             commentend = comment_position + 2
   1277             if commentend < len(line) and not line[commentend] == ' ':
   1278                 # but some lines are exceptions -- e.g. if they're big
   1279                 # comment delimiters like:
   1280                 # //----------------------------------------------------------
   1281                 # or they begin with multiple slashes followed by a space:
   1282                 # //////// Header comment
   1283                 matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
   1284                            or search(r'^/+ ', line[commentend:]))
   1285                 if not matched:
   1286                     error(line_number, 'whitespace/comments', 4,
   1287                           'Should have a space between // and comment')
   1288 
   1289     line = clean_lines.elided[line_number]  # get rid of comments and strings
   1290 
   1291     # Don't try to do spacing checks for operator methods
   1292     line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
   1293     # Don't try to do spacing checks for #include or #import statements at
   1294     # minimum because it messes up checks for spacing around /
   1295     if match(r'\s*#\s*(?:include|import)', line):
   1296         return
   1297     if search(r'[\w.]=[\w.]', line):
   1298         error(line_number, 'whitespace/operators', 4,
   1299               'Missing spaces around =')
   1300 
   1301     # FIXME: It's not ok to have spaces around binary operators like .
   1302 
   1303     # You should always have whitespace around binary operators.
   1304     # Alas, we can't test < or > because they're legitimately used sans spaces
   1305     # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
   1306     # only if it's not template params list spilling into the next line.
   1307     matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line)
   1308     if not matched:
   1309         # Note that while it seems that the '<[^<]*' term in the following
   1310         # regexp could be simplified to '<.*', which would indeed match
   1311         # the same class of strings, the [^<] means that searching for the
   1312         # regexp takes linear rather than quadratic time.
   1313         if not search(r'<[^<]*,\s*$', line):  # template params spill
   1314             matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
   1315     if matched:
   1316         error(line_number, 'whitespace/operators', 3,
   1317               'Missing spaces around %s' % matched.group(1))
   1318 
   1319     # There shouldn't be space around unary operators
   1320     matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
   1321     if matched:
   1322         error(line_number, 'whitespace/operators', 4,
   1323               'Extra space for operator %s' % matched.group(1))
   1324 
   1325     # A pet peeve of mine: no spaces after an if, while, switch, or for
   1326     matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
   1327     if matched:
   1328         error(line_number, 'whitespace/parens', 5,
   1329               'Missing space before ( in %s' % matched.group(1))
   1330 
   1331     # For if/for/foreach/while/switch, the left and right parens should be
   1332     # consistent about how many spaces are inside the parens, and
   1333     # there should either be zero or one spaces inside the parens.
   1334     # We don't want: "if ( foo)" or "if ( foo   )".
   1335     # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   1336     matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<reminder>.*)$', line)
   1337     if matched:
   1338         statement = matched.group('statement')
   1339         condition, rest = up_to_unmatched_closing_paren(matched.group('reminder'))
   1340         if condition is not None:
   1341             condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition)
   1342             if condition_match:
   1343                 n_leading = len(condition_match.group('leading'))
   1344                 n_trailing = len(condition_match.group('trailing'))
   1345                 if n_leading != n_trailing:
   1346                     for_exception = statement == 'for' and (
   1347                         (condition.startswith(' ;') and n_trailing == 0) or
   1348                         (condition.endswith('; ')   and n_leading == 0))
   1349                     if not for_exception:
   1350                         error(line_number, 'whitespace/parens', 5,
   1351                               'Mismatching spaces inside () in %s' % statement)
   1352                 if n_leading > 1:
   1353                     error(line_number, 'whitespace/parens', 5,
   1354                           'Should have zero or one spaces inside ( and ) in %s' %
   1355                           statement)
   1356 
   1357             # Do not check for more than one command in macros
   1358             in_macro = match(r'\s*#define', line)
   1359             if not in_macro and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest):
   1360                 error(line_number, 'whitespace/parens', 4,
   1361                       'More than one command on the same line in %s' % statement)
   1362 
   1363     # You should always have a space after a comma (either as fn arg or operator)
   1364     if search(r',[^\s]', line):
   1365         error(line_number, 'whitespace/comma', 3,
   1366               'Missing space after ,')
   1367 
   1368     if file_extension == 'cpp':
   1369         # C++ should have the & or * beside the type not the variable name.
   1370         matched = match(r'\s*\w+(?<!\breturn)\s+(?P<pointer_operator>\*|\&)\w+', line)
   1371         if matched:
   1372             error(line_number, 'whitespace/declaration', 3,
   1373                   'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
   1374 
   1375     elif file_extension == 'c':
   1376         # C Pointer declaration should have the * beside the variable not the type name.
   1377         matched = search(r'^\s*\w+\*\s+\w+', line)
   1378         if matched:
   1379             error(line_number, 'whitespace/declaration', 3,
   1380                   'Declaration has space between * and variable name in %s' % matched.group(0).strip())
   1381 
   1382     # Next we will look for issues with function calls.
   1383     check_spacing_for_function_call(line, line_number, error)
   1384 
   1385     # Except after an opening paren, you should have spaces before your braces.
   1386     # And since you should never have braces at the beginning of a line, this is
   1387     # an easy test.
   1388     if search(r'[^ ({]{', line):
   1389         error(line_number, 'whitespace/braces', 5,
   1390               'Missing space before {')
   1391 
   1392     # Make sure '} else {' has spaces.
   1393     if search(r'}else', line):
   1394         error(line_number, 'whitespace/braces', 5,
   1395               'Missing space before else')
   1396 
   1397     # You shouldn't have spaces before your brackets, except maybe after
   1398     # 'delete []' or 'new char * []'.
   1399     if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
   1400         error(line_number, 'whitespace/braces', 5,
   1401               'Extra space before [')
   1402 
   1403     # You shouldn't have a space before a semicolon at the end of the line.
   1404     # There's a special case for "for" since the style guide allows space before
   1405     # the semicolon there.
   1406     if search(r':\s*;\s*$', line):
   1407         error(line_number, 'whitespace/semicolon', 5,
   1408               'Semicolon defining empty statement. Use { } instead.')
   1409     elif search(r'^\s*;\s*$', line):
   1410         error(line_number, 'whitespace/semicolon', 5,
   1411               'Line contains only semicolon. If this should be an empty statement, '
   1412               'use { } instead.')
   1413     elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
   1414         error(line_number, 'whitespace/semicolon', 5,
   1415               'Extra space before last semicolon. If this should be an empty '
   1416               'statement, use { } instead.')
   1417     elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
   1418           and line.count('(') == line.count(')')
   1419           # Allow do {} while();
   1420           and not search(r'}\s*while', line)):
   1421         error(line_number, 'whitespace/semicolon', 5,
   1422               'Semicolon defining empty statement for this loop. Use { } instead.')
   1423 
   1424 
   1425 def get_previous_non_blank_line(clean_lines, line_number):
   1426     """Return the most recent non-blank line and its line number.
   1427 
   1428     Args:
   1429       clean_lines: A CleansedLines instance containing the file contents.
   1430       line_number: The number of the line to check.
   1431 
   1432     Returns:
   1433       A tuple with two elements.  The first element is the contents of the last
   1434       non-blank line before the current line, or the empty string if this is the
   1435       first non-blank line.  The second is the line number of that line, or -1
   1436       if this is the first non-blank line.
   1437     """
   1438 
   1439     previous_line_number = line_number - 1
   1440     while previous_line_number >= 0:
   1441         previous_line = clean_lines.elided[previous_line_number]
   1442         if not is_blank_line(previous_line):     # if not a blank line...
   1443             return (previous_line, previous_line_number)
   1444         previous_line_number -= 1
   1445     return ('', -1)
   1446 
   1447 
   1448 def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error):
   1449     """Looks for indentation errors inside of namespaces.
   1450 
   1451     Args:
   1452       clean_lines: A CleansedLines instance containing the file.
   1453       line_number: The number of the line to check.
   1454       file_extension: The extension (dot not included) of the file.
   1455       file_state: A _FileState instance which maintains information about
   1456                   the state of things in the file.
   1457       error: The function to call with any errors found.
   1458     """
   1459 
   1460     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   1461 
   1462     namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
   1463     if not namespace_match:
   1464         return
   1465 
   1466     current_indentation_level = len(namespace_match.group('namespace_indentation'))
   1467     if current_indentation_level > 0:
   1468         # Don't warn about an indented namespace if we already warned about indented code.
   1469         if not file_state.did_inside_namespace_indent_warning():
   1470             error(line_number, 'whitespace/indent', 4,
   1471                   'namespace should never be indented.')
   1472         return
   1473     looking_for_semicolon = False;
   1474     line_offset = 0
   1475     in_preprocessor_directive = False;
   1476     for current_line in clean_lines.elided[line_number + 1:]:
   1477         line_offset += 1
   1478         if not current_line.strip():
   1479             continue
   1480         if not current_indentation_level:
   1481             if not (in_preprocessor_directive or looking_for_semicolon):
   1482                 if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning():
   1483                     file_state.set_did_inside_namespace_indent_warning()
   1484                     error(line_number + line_offset, 'whitespace/indent', 4,
   1485                           'Code inside a namespace should not be indented.')
   1486             if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax.
   1487                 in_preprocessor_directive = current_line[-1] == '\\'
   1488             else:
   1489                 looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\')
   1490         else:
   1491             looking_for_semicolon = False; # If we have a brace we may not need a semicolon.
   1492         current_indentation_level += current_line.count('{') - current_line.count('}')
   1493         if current_indentation_level < 0:
   1494             break;
   1495 
   1496 def check_using_std(file_extension, clean_lines, line_number, error):
   1497     """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'.
   1498 
   1499     Args:
   1500       file_extension: The extension of the current file, without the leading dot.
   1501       clean_lines: A CleansedLines instance containing the file.
   1502       line_number: The number of the line to check.
   1503       error: The function to call with any errors found.
   1504     """
   1505 
   1506     # This check doesn't apply to C or Objective-C implementation files.
   1507     if is_c_or_objective_c(file_extension):
   1508         return
   1509 
   1510     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   1511 
   1512     using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line)
   1513     if not using_std_match:
   1514         return
   1515 
   1516     method_name = using_std_match.group('method_name')
   1517     error(line_number, 'build/using_std', 4,
   1518           "Use 'using namespace std;' instead of 'using std::%s;'." % method_name)
   1519 
   1520 
   1521 def check_max_min_macros(file_extension, clean_lines, line_number, error):
   1522     """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min().
   1523 
   1524     Args:
   1525       file_extension: The extension of the current file, without the leading dot.
   1526       clean_lines: A CleansedLines instance containing the file.
   1527       line_number: The number of the line to check.
   1528       error: The function to call with any errors found.
   1529     """
   1530 
   1531     # This check doesn't apply to C or Objective-C implementation files.
   1532     if is_c_or_objective_c(file_extension):
   1533         return
   1534 
   1535     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   1536 
   1537     max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line)
   1538     if not max_min_macros_search:
   1539         return
   1540 
   1541     max_min_macro = max_min_macros_search.group('max_min_macro')
   1542     max_min_macro_lower = max_min_macro.lower()
   1543     error(line_number, 'runtime/max_min_macros', 4,
   1544           'Use std::%s() or std::%s<type>() instead of the %s() macro.'
   1545           % (max_min_macro_lower, max_min_macro_lower, max_min_macro))
   1546 
   1547 
   1548 def check_switch_indentation(clean_lines, line_number, error):
   1549     """Looks for indentation errors inside of switch statements.
   1550 
   1551     Args:
   1552       clean_lines: A CleansedLines instance containing the file.
   1553       line_number: The number of the line to check.
   1554       error: The function to call with any errors found.
   1555     """
   1556 
   1557     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   1558 
   1559     switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
   1560     if not switch_match:
   1561         return
   1562 
   1563     switch_indentation = switch_match.group('switch_indentation')
   1564     inner_indentation = switch_indentation + ' ' * 4
   1565     line_offset = 0
   1566     encountered_nested_switch = False
   1567 
   1568     for current_line in clean_lines.elided[line_number + 1:]:
   1569         line_offset += 1
   1570 
   1571         # Skip not only empty lines but also those with preprocessor directives.
   1572         if current_line.strip() == '' or current_line.startswith('#'):
   1573             continue
   1574 
   1575         if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
   1576             # Complexity alarm - another switch statement nested inside the one
   1577             # that we're currently testing. We'll need to track the extent of
   1578             # that inner switch if the upcoming label tests are still supposed
   1579             # to work correctly. Let's not do that; instead, we'll finish
   1580             # checking this line, and then leave it like that. Assuming the
   1581             # indentation is done consistently (even if incorrectly), this will
   1582             # still catch all indentation issues in practice.
   1583             encountered_nested_switch = True
   1584 
   1585         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   1586         current_indentation = current_indentation_match.group('indentation')
   1587         remaining_line = current_indentation_match.group('remaining_line')
   1588 
   1589         # End the check at the end of the switch statement.
   1590         if remaining_line.startswith('}') and current_indentation == switch_indentation:
   1591             break
   1592         # Case and default branches should not be indented. The regexp also
   1593         # catches single-line cases like "default: break;" but does not trigger
   1594         # on stuff like "Document::Foo();".
   1595         elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
   1596             if current_indentation != switch_indentation:
   1597                 error(line_number + line_offset, 'whitespace/indent', 4,
   1598                       'A case label should not be indented, but line up with its switch statement.')
   1599                 # Don't throw an error for multiple badly indented labels,
   1600                 # one should be enough to figure out the problem.
   1601                 break
   1602         # We ignore goto labels at the very beginning of a line.
   1603         elif match(r'\w+\s*:\s*$', remaining_line):
   1604             continue
   1605         # It's not a goto label, so check if it's indented at least as far as
   1606         # the switch statement plus one more level of indentation.
   1607         elif not current_indentation.startswith(inner_indentation):
   1608             error(line_number + line_offset, 'whitespace/indent', 4,
   1609                   'Non-label code inside switch statements should be indented.')
   1610             # Don't throw an error for multiple badly indented statements,
   1611             # one should be enough to figure out the problem.
   1612             break
   1613 
   1614         if encountered_nested_switch:
   1615             break
   1616 
   1617 
   1618 def check_braces(clean_lines, line_number, error):
   1619     """Looks for misplaced braces (e.g. at the end of line).
   1620 
   1621     Args:
   1622       clean_lines: A CleansedLines instance containing the file.
   1623       line_number: The number of the line to check.
   1624       error: The function to call with any errors found.
   1625     """
   1626 
   1627     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   1628 
   1629     if match(r'\s*{\s*$', line):
   1630         # We allow an open brace to start a line in the case where someone
   1631         # is using braces for function definition or in a block to
   1632         # explicitly create a new scope, which is commonly used to control
   1633         # the lifetime of stack-allocated variables.  We don't detect this
   1634         # perfectly: we just don't complain if the last non-whitespace
   1635         # character on the previous non-blank line is ';', ':', '{', '}',
   1636         # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
   1637         # We also allow '#' for #endif and '=' for array initialization.
   1638         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   1639         if ((not search(r'[;:}{)=]\s*$|\)\s*const\s*$', previous_line)
   1640              or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
   1641             and previous_line.find('#') < 0):
   1642             error(line_number, 'whitespace/braces', 4,
   1643                   'This { should be at the end of the previous line')
   1644     elif (search(r'\)\s*(const\s*)?{\s*$', line)
   1645           and line.count('(') == line.count(')')
   1646           and not search(r'\b(if|for|foreach|while|switch)\b', line)
   1647           and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)):
   1648         error(line_number, 'whitespace/braces', 4,
   1649               'Place brace on its own line for function definitions.')
   1650 
   1651     if (match(r'\s*}\s*(else\s*({\s*)?)?$', line) and line_number > 1):
   1652         # We check if a closed brace has started a line to see if a
   1653         # one line control statement was previous.
   1654         previous_line = clean_lines.elided[line_number - 2]
   1655         if (previous_line.find('{') > 0
   1656             and search(r'\b(if|for|foreach|while|else)\b', previous_line)):
   1657             error(line_number, 'whitespace/braces', 4,
   1658                   'One line control clauses should not use braces.')
   1659 
   1660     # An else clause should be on the same line as the preceding closing brace.
   1661     if match(r'\s*else\s*', line):
   1662         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
   1663         if match(r'\s*}\s*$', previous_line):
   1664             error(line_number, 'whitespace/newline', 4,
   1665                   'An else should appear on the same line as the preceding }')
   1666 
   1667     # Likewise, an else should never have the else clause on the same line
   1668     if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
   1669         error(line_number, 'whitespace/newline', 4,
   1670               'Else clause should never be on same line as else (use 2 lines)')
   1671 
   1672     # In the same way, a do/while should never be on one line
   1673     if match(r'\s*do [^\s{]', line):
   1674         error(line_number, 'whitespace/newline', 4,
   1675               'do/while clauses should not be on a single line')
   1676 
   1677     # Braces shouldn't be followed by a ; unless they're defining a struct
   1678     # or initializing an array.
   1679     # We can't tell in general, but we can for some common cases.
   1680     previous_line_number = line_number
   1681     while True:
   1682         (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
   1683         if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
   1684             line = previous_line + line
   1685         else:
   1686             break
   1687     if (search(r'{.*}\s*;', line)
   1688         and line.count('{') == line.count('}')
   1689         and not search(r'struct|class|enum|\s*=\s*{', line)):
   1690         error(line_number, 'readability/braces', 4,
   1691               "You don't need a ; after a }")
   1692 
   1693 
   1694 def check_exit_statement_simplifications(clean_lines, line_number, error):
   1695     """Looks for else or else-if statements that should be written as an
   1696     if statement when the prior if concludes with a return, break, continue or
   1697     goto statement.
   1698 
   1699     Args:
   1700       clean_lines: A CleansedLines instance containing the file.
   1701       line_number: The number of the line to check.
   1702       error: The function to call with any errors found.
   1703     """
   1704 
   1705     line = clean_lines.elided[line_number] # Get rid of comments and strings.
   1706 
   1707     else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
   1708     if not else_match:
   1709         return
   1710 
   1711     else_indentation = else_match.group('else_indentation')
   1712     inner_indentation = else_indentation + ' ' * 4
   1713 
   1714     previous_lines = clean_lines.elided[:line_number]
   1715     previous_lines.reverse()
   1716     line_offset = 0
   1717     encountered_exit_statement = False
   1718 
   1719     for current_line in previous_lines:
   1720         line_offset -= 1
   1721 
   1722         # Skip not only empty lines but also those with preprocessor directives
   1723         # and goto labels.
   1724         if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
   1725             continue
   1726 
   1727         # Skip lines with closing braces on the original indentation level.
   1728         # Even though the styleguide says they should be on the same line as
   1729         # the "else if" statement, we also want to check for instances where
   1730         # the current code does not comply with the coding style. Thus, ignore
   1731         # these lines and proceed to the line before that.
   1732         if current_line == else_indentation + '}':
   1733             continue
   1734 
   1735         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
   1736         current_indentation = current_indentation_match.group('indentation')
   1737         remaining_line = current_indentation_match.group('remaining_line')
   1738 
   1739         # As we're going up the lines, the first real statement to encounter
   1740         # has to be an exit statement (return, break, continue or goto) -
   1741         # otherwise, this check doesn't apply.
   1742         if not encountered_exit_statement:
   1743             # We only want to find exit statements if they are on exactly
   1744             # the same level of indentation as expected from the code inside
   1745             # the block. If the indentation doesn't strictly match then we
   1746             # might have a nested if or something, which must be ignored.
   1747             if current_indentation != inner_indentation:
   1748                 break
   1749             if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
   1750                 encountered_exit_statement = True
   1751                 continue
   1752             break
   1753 
   1754         # When code execution reaches this point, we've found an exit statement
   1755         # as last statement of the previous block. Now we only need to make
   1756         # sure that the block belongs to an "if", then we can throw an error.
   1757 
   1758         # Skip lines with opening braces on the original indentation level,
   1759         # similar to the closing braces check above. ("if (condition)\n{")
   1760         if current_line == else_indentation + '{':
   1761             continue
   1762 
   1763         # Skip everything that's further indented than our "else" or "else if".
   1764         if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
   1765             continue
   1766 
   1767         # So we've got a line with same (or less) indentation. Is it an "if"?
   1768         # If yes: throw an error. If no: don't throw an error.
   1769         # Whatever the outcome, this is the end of our loop.
   1770         if match(r'if\s*\(', remaining_line):
   1771             if else_match.start('else') != -1:
   1772                 error(line_number + line_offset, 'readability/control_flow', 4,
   1773                       'An else statement can be removed when the prior "if" '
   1774                       'concludes with a return, break, continue or goto statement.')
   1775             else:
   1776                 error(line_number + line_offset, 'readability/control_flow', 4,
   1777                       'An else if statement should be written as an if statement '
   1778                       'when the prior "if" concludes with a return, break, '
   1779                       'continue or goto statement.')
   1780         break
   1781 
   1782 
   1783 def replaceable_check(operator, macro, line):
   1784     """Determine whether a basic CHECK can be replaced with a more specific one.
   1785 
   1786     For example suggest using CHECK_EQ instead of CHECK(a == b) and
   1787     similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
   1788 
   1789     Args:
   1790       operator: The C++ operator used in the CHECK.
   1791       macro: The CHECK or EXPECT macro being called.
   1792       line: The current source line.
   1793 
   1794     Returns:
   1795       True if the CHECK can be replaced with a more specific one.
   1796     """
   1797 
   1798     # This matches decimal and hex integers, strings, and chars (in that order).
   1799     match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
   1800 
   1801     # Expression to match two sides of the operator with something that
   1802     # looks like a literal, since CHECK(x == iterator) won't compile.
   1803     # This means we can't catch all the cases where a more specific
   1804     # CHECK is possible, but it's less annoying than dealing with
   1805     # extraneous warnings.
   1806     match_this = (r'\s*' + macro + r'\((\s*' +
   1807                   match_constant + r'\s*' + operator + r'[^<>].*|'
   1808                   r'.*[^<>]' + operator + r'\s*' + match_constant +
   1809                   r'\s*\))')
   1810 
   1811     # Don't complain about CHECK(x == NULL) or similar because
   1812     # CHECK_EQ(x, NULL) won't compile (requires a cast).
   1813     # Also, don't complain about more complex boolean expressions
   1814     # involving && or || such as CHECK(a == b || c == d).
   1815     return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
   1816 
   1817 
   1818 def check_check(clean_lines, line_number, error):
   1819     """Checks the use of CHECK and EXPECT macros.
   1820 
   1821     Args:
   1822       clean_lines: A CleansedLines instance containing the file.
   1823       line_number: The number of the line to check.
   1824       error: The function to call with any errors found.
   1825     """
   1826 
   1827     # Decide the set of replacement macros that should be suggested
   1828     raw_lines = clean_lines.raw_lines
   1829     current_macro = ''
   1830     for macro in _CHECK_MACROS:
   1831         if raw_lines[line_number].find(macro) >= 0:
   1832             current_macro = macro
   1833             break
   1834     if not current_macro:
   1835         # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
   1836         return
   1837 
   1838     line = clean_lines.elided[line_number]        # get rid of comments and strings
   1839 
   1840     # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
   1841     for operator in ['==', '!=', '>=', '>', '<=', '<']:
   1842         if replaceable_check(operator, current_macro, line):
   1843             error(line_number, 'readability/check', 2,
   1844                   'Consider using %s instead of %s(a %s b)' % (
   1845                       _CHECK_REPLACEMENT[current_macro][operator],
   1846                       current_macro, operator))
   1847             break
   1848 
   1849 
   1850 def check_for_comparisons_to_zero(clean_lines, line_number, error):
   1851     # Get the line without comments and strings.
   1852     line = clean_lines.elided[line_number]
   1853 
   1854     # Include NULL here so that users don't have to convert NULL to 0 first and then get this error.
   1855     if search(r'[=!]=\s*(NULL|0|true|false)\W', line) or search(r'\W(NULL|0|true|false)\s*[=!]=', line):
   1856         error(line_number, 'readability/comparison_to_zero', 5,
   1857               'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.')
   1858 
   1859 
   1860 def check_for_null(file_extension, clean_lines, line_number, error):
   1861     # This check doesn't apply to C or Objective-C implementation files.
   1862     if is_c_or_objective_c(file_extension):
   1863         return
   1864 
   1865     line = clean_lines.elided[line_number]
   1866 
   1867     # Don't warn about NULL usage in g_object_{get,set}(). See Bug 32858
   1868     if search(r'\bg_object_[sg]et\b', line):
   1869         return
   1870 
   1871     if search(r'\bNULL\b', line):
   1872         error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
   1873         return
   1874 
   1875     line = clean_lines.raw_lines[line_number]
   1876     # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
   1877     # matches, then do the check with strings collapsed to avoid giving errors for
   1878     # NULLs occurring in strings.
   1879     if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
   1880         error(line_number, 'readability/null', 4, 'Use 0 instead of NULL.')
   1881 
   1882 def get_line_width(line):
   1883     """Determines the width of the line in column positions.
   1884 
   1885     Args:
   1886       line: A string, which may be a Unicode string.
   1887 
   1888     Returns:
   1889       The width of the line in column positions, accounting for Unicode
   1890       combining characters and wide characters.
   1891     """
   1892     if isinstance(line, unicode):
   1893         width = 0
   1894         for c in unicodedata.normalize('NFC', line):
   1895             if unicodedata.east_asian_width(c) in ('W', 'F'):
   1896                 width += 2
   1897             elif not unicodedata.combining(c):
   1898                 width += 1
   1899         return width
   1900     return len(line)
   1901 
   1902 
   1903 def check_style(clean_lines, line_number, file_extension, file_state, error):
   1904     """Checks rules from the 'C++ style rules' section of cppguide.html.
   1905 
   1906     Most of these rules are hard to test (naming, comment style), but we
   1907     do what we can.  In particular we check for 4-space indents, line lengths,
   1908     tab usage, spaces inside code, etc.
   1909 
   1910     Args:
   1911       clean_lines: A CleansedLines instance containing the file.
   1912       line_number: The number of the line to check.
   1913       file_extension: The extension (without the dot) of the filename.
   1914       file_state: A _FileState instance which maintains information about
   1915                   the state of things in the file.
   1916       error: The function to call with any errors found.
   1917     """
   1918 
   1919     raw_lines = clean_lines.raw_lines
   1920     line = raw_lines[line_number]
   1921 
   1922     if line.find('\t') != -1:
   1923         error(line_number, 'whitespace/tab', 1,
   1924               'Tab found; better to use spaces')
   1925 
   1926     # One or three blank spaces at the beginning of the line is weird; it's
   1927     # hard to reconcile that with 4-space indents.
   1928     # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
   1929     # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
   1930     # if(RLENGTH > 20) complain = 0;
   1931     # if(match($0, " +(error|private|public|protected):")) complain = 0;
   1932     # if(match(prev, "&& *$")) complain = 0;
   1933     # if(match(prev, "\\|\\| *$")) complain = 0;
   1934     # if(match(prev, "[\",=><] *$")) complain = 0;
   1935     # if(match($0, " <<")) complain = 0;
   1936     # if(match(prev, " +for \\(")) complain = 0;
   1937     # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
   1938     initial_spaces = 0
   1939     cleansed_line = clean_lines.elided[line_number]
   1940     while initial_spaces < len(line) and line[initial_spaces] == ' ':
   1941         initial_spaces += 1
   1942     if line and line[-1].isspace():
   1943         error(line_number, 'whitespace/end_of_line', 4,
   1944               'Line ends in whitespace.  Consider deleting these extra spaces.')
   1945     # There are certain situations we allow one space, notably for labels
   1946     elif ((initial_spaces >= 1 and initial_spaces <= 3)
   1947           and not match(r'\s*\w+\s*:\s*$', cleansed_line)):
   1948         error(line_number, 'whitespace/indent', 3,
   1949               'Weird number of spaces at line-start.  '
   1950               'Are you using a 4-space indent?')
   1951     # Labels should always be indented at least one space.
   1952     elif not initial_spaces and line[:2] != '//':
   1953         label_match = match(r'(?P<label>[^:]+):\s*$', line)
   1954 
   1955         if label_match:
   1956             label = label_match.group('label')
   1957             # Only throw errors for stuff that is definitely not a goto label,
   1958             # because goto labels can in fact occur at the start of the line.
   1959             if label in ['public', 'private', 'protected'] or label.find(' ') != -1:
   1960                 error(line_number, 'whitespace/labels', 4,
   1961                       'Labels should always be indented at least one space.  '
   1962                       'If this is a member-initializer list in a constructor, '
   1963                       'the colon should be on the line after the definition header.')
   1964 
   1965     if (cleansed_line.count(';') > 1
   1966         # for loops are allowed two ;'s (and may run over two lines).
   1967         and cleansed_line.find('for') == -1
   1968         and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
   1969              or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
   1970         # It's ok to have many commands in a switch case that fits in 1 line
   1971         and not ((cleansed_line.find('case ') != -1
   1972                   or cleansed_line.find('default:') != -1)
   1973                  and cleansed_line.find('break;') != -1)
   1974         and not cleansed_line.startswith('#define ')):
   1975         error(line_number, 'whitespace/newline', 4,
   1976               'More than one command on the same line')
   1977 
   1978     if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
   1979         error(line_number, 'whitespace/operators', 4,
   1980               'Boolean expressions that span multiple lines should have their '
   1981               'operators on the left side of the line instead of the right side.')
   1982 
   1983     # Some more style checks
   1984     check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error)
   1985     check_using_std(file_extension, clean_lines, line_number, error)
   1986     check_max_min_macros(file_extension, clean_lines, line_number, error)
   1987     check_switch_indentation(clean_lines, line_number, error)
   1988     check_braces(clean_lines, line_number, error)
   1989     check_exit_statement_simplifications(clean_lines, line_number, error)
   1990     check_spacing(file_extension, clean_lines, line_number, error)
   1991     check_check(clean_lines, line_number, error)
   1992     check_for_comparisons_to_zero(clean_lines, line_number, error)
   1993     check_for_null(file_extension, clean_lines, line_number, error)
   1994 
   1995 
   1996 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
   1997 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
   1998 # Matches the first component of a filename delimited by -s and _s. That is:
   1999 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
   2000 #  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
   2001 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
   2002 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
   2003 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
   2004 
   2005 
   2006 def _drop_common_suffixes(filename):
   2007     """Drops common suffixes like _test.cpp or -inl.h from filename.
   2008 
   2009     For example:
   2010       >>> _drop_common_suffixes('foo/foo-inl.h')
   2011       'foo/foo'
   2012       >>> _drop_common_suffixes('foo/bar/foo.cpp')
   2013       'foo/bar/foo'
   2014       >>> _drop_common_suffixes('foo/foo_internal.h')
   2015       'foo/foo'
   2016       >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
   2017       'foo/foo_unusualinternal'
   2018 
   2019     Args:
   2020       filename: The input filename.
   2021 
   2022     Returns:
   2023       The filename with the common suffix removed.
   2024     """
   2025     for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
   2026                    'inl.h', 'impl.h', 'internal.h'):
   2027         if (filename.endswith(suffix) and len(filename) > len(suffix)
   2028             and filename[-len(suffix) - 1] in ('-', '_')):
   2029             return filename[:-len(suffix) - 1]
   2030     return os.path.splitext(filename)[0]
   2031 
   2032 
   2033 def _classify_include(filename, include, is_system, include_state):
   2034     """Figures out what kind of header 'include' is.
   2035 
   2036     Args:
   2037       filename: The current file cpp_style is running over.
   2038       include: The path to a #included file.
   2039       is_system: True if the #include used <> rather than "".
   2040       include_state: An _IncludeState instance in which the headers are inserted.
   2041 
   2042     Returns:
   2043       One of the _XXX_HEADER constants.
   2044 
   2045     For example:
   2046       >>> _classify_include('foo.cpp', 'config.h', False)
   2047       _CONFIG_HEADER
   2048       >>> _classify_include('foo.cpp', 'foo.h', False)
   2049       _PRIMARY_HEADER
   2050       >>> _classify_include('foo.cpp', 'bar.h', False)
   2051       _OTHER_HEADER
   2052     """
   2053 
   2054     # If it is a system header we know it is classified as _OTHER_HEADER.
   2055     if is_system:
   2056         return _OTHER_HEADER
   2057 
   2058     # If the include is named config.h then this is WebCore/config.h.
   2059     if include == "config.h":
   2060         return _CONFIG_HEADER
   2061 
   2062     # There cannot be primary includes in header files themselves. Only an
   2063     # include exactly matches the header filename will be is flagged as
   2064     # primary, so that it triggers the "don't include yourself" check.
   2065     if filename.endswith('.h') and filename != include:
   2066         return _OTHER_HEADER;
   2067 
   2068     # Qt's moc files do not follow the naming and ordering rules, so they should be skipped
   2069     if include.startswith('moc_') and include.endswith('.cpp'):
   2070         return _MOC_HEADER
   2071 
   2072     if include.endswith('.moc'):
   2073         return _MOC_HEADER
   2074 
   2075     # If the target file basename starts with the include we're checking
   2076     # then we consider it the primary header.
   2077     target_base = FileInfo(filename).base_name()
   2078     include_base = FileInfo(include).base_name()
   2079 
   2080     # If we haven't encountered a primary header, then be lenient in checking.
   2081     if not include_state.visited_primary_section() and target_base.find(include_base) != -1:
   2082         return _PRIMARY_HEADER
   2083     # If we already encountered a primary header, perform a strict comparison.
   2084     # In case the two filename bases are the same then the above lenient check
   2085     # probably was a false positive.
   2086     elif include_state.visited_primary_section() and target_base == include_base:
   2087         if include == "ResourceHandleWin.h":
   2088             # FIXME: Thus far, we've only seen one example of these, but if we
   2089             # start to see more, please consider generalizing this check
   2090             # somehow.
   2091             return _OTHER_HEADER
   2092         return _PRIMARY_HEADER
   2093 
   2094     return _OTHER_HEADER
   2095 
   2096 
   2097 def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error):
   2098     """Check rules that are applicable to #include lines.
   2099 
   2100     Strings on #include lines are NOT removed from elided line, to make
   2101     certain tasks easier. However, to prevent false positives, checks
   2102     applicable to #include lines in CheckLanguage must be put here.
   2103 
   2104     Args:
   2105       filename: The name of the current file.
   2106       file_extension: The current file extension, without the leading dot.
   2107       clean_lines: A CleansedLines instance containing the file.
   2108       line_number: The number of the line to check.
   2109       include_state: An _IncludeState instance in which the headers are inserted.
   2110       error: The function to call with any errors found.
   2111     """
   2112     # FIXME: For readability or as a possible optimization, consider
   2113     #        exiting early here by checking whether the "build/include"
   2114     #        category should be checked for the given filename.  This
   2115     #        may involve having the error handler classes expose a
   2116     #        should_check() method, in addition to the usual __call__
   2117     #        method.
   2118     line = clean_lines.lines[line_number]
   2119 
   2120     matched = _RE_PATTERN_INCLUDE.search(line)
   2121     if not matched:
   2122         return
   2123 
   2124     include = matched.group(2)
   2125     is_system = (matched.group(1) == '<')
   2126 
   2127     # Look for any of the stream classes that are part of standard C++.
   2128     if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
   2129         error(line_number, 'readability/streams', 3,
   2130               'Streams are highly discouraged.')
   2131 
   2132     # Look for specific includes to fix.
   2133     if include.startswith('wtf/') and not is_system:
   2134         error(line_number, 'build/include', 4,
   2135               'wtf includes should be <wtf/file.h> instead of "wtf/file.h".')
   2136 
   2137     duplicate_header = include in include_state
   2138     if duplicate_header:
   2139         error(line_number, 'build/include', 4,
   2140               '"%s" already included at %s:%s' %
   2141               (include, filename, include_state[include]))
   2142     else:
   2143         include_state[include] = line_number
   2144 
   2145     header_type = _classify_include(filename, include, is_system, include_state)
   2146     include_state.header_types[line_number] = header_type
   2147 
   2148     # Only proceed if this isn't a duplicate header.
   2149     if duplicate_header:
   2150         return
   2151 
   2152     # We want to ensure that headers appear in the right order:
   2153     # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
   2154     # 2) for header files: alphabetically sorted
   2155     # The include_state object keeps track of the last type seen
   2156     # and complains if the header types are out of order or missing.
   2157     error_message = include_state.check_next_include_order(header_type, file_extension == "h")
   2158 
   2159     # Check to make sure we have a blank line after primary header.
   2160     if not error_message and header_type == _PRIMARY_HEADER:
   2161          next_line = clean_lines.raw_lines[line_number + 1]
   2162          if not is_blank_line(next_line):
   2163             error(line_number, 'build/include_order', 4,
   2164                   'You should add a blank line after implementation file\'s own header.')
   2165 
   2166     # Check to make sure all headers besides config.h and the primary header are
   2167     # alphabetically sorted. Skip Qt's moc files.
   2168     if not error_message and header_type == _OTHER_HEADER:
   2169          previous_line_number = line_number - 1;
   2170          previous_line = clean_lines.lines[previous_line_number]
   2171          previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   2172          while (not previous_match and previous_line_number > 0
   2173                 and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
   2174             previous_line_number -= 1;
   2175             previous_line = clean_lines.lines[previous_line_number]
   2176             previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
   2177          if previous_match:
   2178             previous_header_type = include_state.header_types[previous_line_number]
   2179             if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
   2180                 error(line_number, 'build/include_order', 4,
   2181                       'Alphabetical sorting problem.')
   2182 
   2183     if error_message:
   2184         if file_extension == 'h':
   2185             error(line_number, 'build/include_order', 4,
   2186                   '%s Should be: alphabetically sorted.' %
   2187                   error_message)
   2188         else:
   2189             error(line_number, 'build/include_order', 4,
   2190                   '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
   2191                   error_message)
   2192 
   2193 
   2194 def check_language(filename, clean_lines, line_number, file_extension, include_state,
   2195                    error):
   2196     """Checks rules from the 'C++ language rules' section of cppguide.html.
   2197 
   2198     Some of these rules are hard to test (function overloading, using
   2199     uint32 inappropriately), but we do the best we can.
   2200 
   2201     Args:
   2202       filename: The name of the current file.
   2203       clean_lines: A CleansedLines instance containing the file.
   2204       line_number: The number of the line to check.
   2205       file_extension: The extension (without the dot) of the filename.
   2206       include_state: An _IncludeState instance in which the headers are inserted.
   2207       error: The function to call with any errors found.
   2208     """
   2209     # If the line is empty or consists of entirely a comment, no need to
   2210     # check it.
   2211     line = clean_lines.elided[line_number]
   2212     if not line:
   2213         return
   2214 
   2215     matched = _RE_PATTERN_INCLUDE.search(line)
   2216     if matched:
   2217         check_include_line(filename, file_extension, clean_lines, line_number, include_state, error)
   2218         return
   2219 
   2220     # FIXME: figure out if they're using default arguments in fn proto.
   2221 
   2222     # Check to see if they're using an conversion function cast.
   2223     # I just try to capture the most common basic types, though there are more.
   2224     # Parameterless conversion functions, such as bool(), are allowed as they are
   2225     # probably a member operator declaration or default constructor.
   2226     matched = search(
   2227         r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
   2228     if matched:
   2229         # gMock methods are defined using some variant of MOCK_METHODx(name, type)
   2230         # where type may be float(), int(string), etc.  Without context they are
   2231         # virtually indistinguishable from int(x) casts.
   2232         if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
   2233             error(line_number, 'readability/casting', 4,
   2234                   'Using deprecated casting style.  '
   2235                   'Use static_cast<%s>(...) instead' %
   2236                   matched.group(1))
   2237 
   2238     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   2239                        'static_cast',
   2240                        r'\((int|float|double|bool|char|u?int(16|32|64))\)',
   2241                        error)
   2242     # This doesn't catch all cases.  Consider (const char * const)"hello".
   2243     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
   2244                        'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
   2245 
   2246     # In addition, we look for people taking the address of a cast.  This
   2247     # is dangerous -- casts can assign to temporaries, so the pointer doesn't
   2248     # point where you think.
   2249     if search(
   2250         r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
   2251         error(line_number, 'runtime/casting', 4,
   2252               ('Are you taking an address of a cast?  '
   2253                'This is dangerous: could be a temp var.  '
   2254                'Take the address before doing the cast, rather than after'))
   2255 
   2256     # Check for people declaring static/global STL strings at the top level.
   2257     # This is dangerous because the C++ language does not guarantee that
   2258     # globals with constructors are initialized before the first access.
   2259     matched = match(
   2260         r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
   2261         line)
   2262     # Make sure it's not a function.
   2263     # Function template specialization looks like: "string foo<Type>(...".
   2264     # Class template definitions look like: "string Foo<Type>::Method(...".
   2265     if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
   2266                              matched.group(3)):
   2267         error(line_number, 'runtime/string', 4,
   2268               'For a static/global string constant, use a C style string instead: '
   2269               '"%schar %s[]".' %
   2270               (matched.group(1), matched.group(2)))
   2271 
   2272     # Check that we're not using RTTI outside of testing code.
   2273     if search(r'\bdynamic_cast<', line):
   2274         error(line_number, 'runtime/rtti', 5,
   2275               'Do not use dynamic_cast<>.  If you need to cast within a class '
   2276               "hierarchy, use static_cast<> to upcast.  Google doesn't support "
   2277               'RTTI.')
   2278 
   2279     if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
   2280         error(line_number, 'runtime/init', 4,
   2281               'You seem to be initializing a member variable with itself.')
   2282 
   2283     if file_extension == 'h':
   2284         # FIXME: check that 1-arg constructors are explicit.
   2285         #        How to tell it's a constructor?
   2286         #        (handled in check_for_non_standard_constructs for now)
   2287         pass
   2288 
   2289     # Check if people are using the verboten C basic types.  The only exception
   2290     # we regularly allow is "unsigned short port" for port.
   2291     if search(r'\bshort port\b', line):
   2292         if not search(r'\bunsigned short port\b', line):
   2293             error(line_number, 'runtime/int', 4,
   2294                   'Use "unsigned short" for ports, not "short"')
   2295 
   2296     # When snprintf is used, the second argument shouldn't be a literal.
   2297     matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
   2298     if matched:
   2299         error(line_number, 'runtime/printf', 3,
   2300               'If you can, use sizeof(%s) instead of %s as the 2nd arg '
   2301               'to snprintf.' % (matched.group(1), matched.group(2)))
   2302 
   2303     # Check if some verboten C functions are being used.
   2304     if search(r'\bsprintf\b', line):
   2305         error(line_number, 'runtime/printf', 5,
   2306               'Never use sprintf.  Use snprintf instead.')
   2307     matched = search(r'\b(strcpy|strcat)\b', line)
   2308     if matched:
   2309         error(line_number, 'runtime/printf', 4,
   2310               'Almost always, snprintf is better than %s' % matched.group(1))
   2311 
   2312     if search(r'\bsscanf\b', line):
   2313         error(line_number, 'runtime/printf', 1,
   2314               'sscanf can be ok, but is slow and can overflow buffers.')
   2315 
   2316     # Check for suspicious usage of "if" like
   2317     # } if (a == b) {
   2318     if search(r'\}\s*if\s*\(', line):
   2319         error(line_number, 'readability/braces', 4,
   2320               'Did you mean "else if"? If not, start a new line for "if".')
   2321 
   2322     # Check for potential format string bugs like printf(foo).
   2323     # We constrain the pattern not to pick things like DocidForPrintf(foo).
   2324     # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
   2325     matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
   2326     if matched:
   2327         error(line_number, 'runtime/printf', 4,
   2328               'Potential format string bug. Do %s("%%s", %s) instead.'
   2329               % (matched.group(1), matched.group(2)))
   2330 
   2331     # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
   2332     matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
   2333     if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
   2334         error(line_number, 'runtime/memset', 4,
   2335               'Did you mean "memset(%s, 0, %s)"?'
   2336               % (matched.group(1), matched.group(2)))
   2337 
   2338     # Detect variable-length arrays.
   2339     matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
   2340     if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
   2341         matched.group(3).find(']') == -1):
   2342         # Split the size using space and arithmetic operators as delimiters.
   2343         # If any of the resulting tokens are not compile time constants then
   2344         # report the error.
   2345         tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
   2346         is_const = True
   2347         skip_next = False
   2348         for tok in tokens:
   2349             if skip_next:
   2350                 skip_next = False
   2351                 continue
   2352 
   2353             if search(r'sizeof\(.+\)', tok):
   2354                 continue
   2355             if search(r'arraysize\(\w+\)', tok):
   2356                 continue
   2357 
   2358             tok = tok.lstrip('(')
   2359             tok = tok.rstrip(')')
   2360             if not tok:
   2361                 continue
   2362             if match(r'\d+', tok):
   2363                 continue
   2364             if match(r'0[xX][0-9a-fA-F]+', tok):
   2365                 continue
   2366             if match(r'k[A-Z0-9]\w*', tok):
   2367                 continue
   2368             if match(r'(.+::)?k[A-Z0-9]\w*', tok):
   2369                 continue
   2370             if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
   2371                 continue
   2372             # A catch all for tricky sizeof cases, including 'sizeof expression',
   2373             # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
   2374             # requires skipping the next token becasue we split on ' ' and '*'.
   2375             if tok.startswith('sizeof'):
   2376                 skip_next = True
   2377                 continue
   2378             is_const = False
   2379             break
   2380         if not is_const:
   2381             error(line_number, 'runtime/arrays', 1,
   2382                   'Do not use variable-length arrays.  Use an appropriately named '
   2383                   "('k' followed by CamelCase) compile-time constant for the size.")
   2384 
   2385     # Check for use of unnamed namespaces in header files.  Registration
   2386     # macros are typically OK, so we allow use of "namespace {" on lines
   2387     # that end with backslashes.
   2388     if (file_extension == 'h'
   2389         and search(r'\bnamespace\s*{', line)
   2390         and line[-1] != '\\'):
   2391         error(line_number, 'build/namespaces', 4,
   2392               'Do not use unnamed namespaces in header files.  See '
   2393               'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
   2394               ' for more information.')
   2395 
   2396     check_identifier_name_in_declaration(filename, line_number, line, error)
   2397 
   2398 
   2399 def check_identifier_name_in_declaration(filename, line_number, line, error):
   2400     """Checks if identifier names contain any underscores.
   2401 
   2402     As identifiers in libraries we are using have a bunch of
   2403     underscores, we only warn about the declarations of identifiers
   2404     and don't check use of identifiers.
   2405 
   2406     Args:
   2407       filename: The name of the current file.
   2408       line_number: The number of the line to check.
   2409       line: The line of code to check.
   2410       error: The function to call with any errors found.
   2411     """
   2412     # We don't check a return statement.
   2413     if match(r'\s*(return|delete)\b', line):
   2414         return
   2415 
   2416     # Basically, a declaration is a type name followed by whitespaces
   2417     # followed by an identifier. The type name can be complicated
   2418     # due to type adjectives and templates. We remove them first to
   2419     # simplify the process to find declarations of identifiers.
   2420 
   2421     # Convert "long long", "long double", and "long long int" to
   2422     # simple types, but don't remove simple "long".
   2423     line = sub(r'long (long )?(?=long|double|int)', '', line)
   2424     line = sub(r'\b(unsigned|signed|inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line)
   2425 
   2426     # Remove all template parameters by removing matching < and >.
   2427     # Loop until no templates are removed to remove nested templates.
   2428     while True:
   2429         line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line)
   2430         if not number_of_replacements:
   2431             break
   2432 
   2433     # Declarations of local variables can be in condition expressions
   2434     # of control flow statements (e.g., "if (RenderObject* p = o->parent())").
   2435     # We remove the keywords and the first parenthesis.
   2436     #
   2437     # Declarations in "while", "if", and "switch" are different from
   2438     # other declarations in two aspects:
   2439     #
   2440     # - There can be only one declaration between the parentheses.
   2441     #   (i.e., you cannot write "if (int i = 0, j = 1) {}")
   2442     # - The variable must be initialized.
   2443     #   (i.e., you cannot write "if (int i) {}")
   2444     #
   2445     # and we will need different treatments for them.
   2446     line = sub(r'^\s*for\s*\(', '', line)
   2447     line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line)
   2448 
   2449     # Detect variable and functions.
   2450     type_regexp = r'\w([\w]|\s*[*&]\s*|::)+'
   2451     identifier_regexp = r'(?P<identifier>[\w:]+)'
   2452     character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)'
   2453     declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*' + character_after_identifier_regexp
   2454     declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp
   2455     is_function_arguments = False
   2456     number_of_identifiers = 0
   2457     while True:
   2458         # If we are seeing the first identifier or arguments of a
   2459         # function, there should be a type name before an identifier.
   2460         if not number_of_identifiers or is_function_arguments:
   2461             declaration_regexp = declaration_with_type_regexp
   2462         else:
   2463             declaration_regexp = declaration_without_type_regexp
   2464 
   2465         matched = match(declaration_regexp, line)
   2466         if not matched:
   2467             return
   2468         identifier = matched.group('identifier')
   2469         character_after_identifier = matched.group('character_after_identifier')
   2470 
   2471         # If we removed a non-for-control statement, the character after
   2472         # the identifier should be '='. With this rule, we can avoid
   2473         # warning for cases like "if (val & INT_MAX) {".
   2474         if control_statement and character_after_identifier != '=':
   2475             return
   2476 
   2477         is_function_arguments = is_function_arguments or character_after_identifier == '('
   2478 
   2479         # Remove "m_" and "s_" to allow them.
   2480         modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier)
   2481         if modified_identifier.find('_') >= 0:
   2482             # Various exceptions to the rule: JavaScript op codes functions, const_iterator.
   2483             if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('_op_') >= 0)
   2484                 and not modified_identifier.startswith('tst_')
   2485                 and not modified_identifier.startswith('webkit_dom_object_')
   2486                 and not modified_identifier.startswith('qt_')
   2487                 and not modified_identifier.find('::qt_') >= 0
   2488                 and not modified_identifier == "const_iterator"):
   2489                 error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.")
   2490 
   2491         # There can be only one declaration in non-for-control statements.
   2492         if control_statement:
   2493             return
   2494         # We should continue checking if this is a function
   2495         # declaration because we need to check its arguments.
   2496         # Also, we need to check multiple declarations.
   2497         if character_after_identifier != '(' and character_after_identifier != ',':
   2498             return
   2499 
   2500         number_of_identifiers += 1
   2501         line = line[matched.end():]
   2502 
   2503 
   2504 def check_c_style_cast(line_number, line, raw_line, cast_type, pattern,
   2505                        error):
   2506     """Checks for a C-style cast by looking for the pattern.
   2507 
   2508     This also handles sizeof(type) warnings, due to similarity of content.
   2509 
   2510     Args:
   2511       line_number: The number of the line to check.
   2512       line: The line of code to check.
   2513       raw_line: The raw line of code to check, with comments.
   2514       cast_type: The string for the C++ cast to recommend.  This is either
   2515                  reinterpret_cast or static_cast, depending.
   2516       pattern: The regular expression used to find C-style casts.
   2517       error: The function to call with any errors found.
   2518     """
   2519     matched = search(pattern, line)
   2520     if not matched:
   2521         return
   2522 
   2523     # e.g., sizeof(int)
   2524     sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
   2525     if sizeof_match:
   2526         error(line_number, 'runtime/sizeof', 1,
   2527               'Using sizeof(type).  Use sizeof(varname) instead if possible')
   2528         return
   2529 
   2530     remainder = line[matched.end(0):]
   2531 
   2532     # The close paren is for function pointers as arguments to a function.
   2533     # eg, void foo(void (*bar)(int));
   2534     # The semicolon check is a more basic function check; also possibly a
   2535     # function pointer typedef.
   2536     # eg, void foo(int); or void foo(int) const;
   2537     # The equals check is for function pointer assignment.
   2538     # eg, void *(*foo)(int) = ...
   2539     #
   2540     # Right now, this will only catch cases where there's a single argument, and
   2541     # it's unnamed.  It should probably be expanded to check for multiple
   2542     # arguments with some unnamed.
   2543     function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
   2544     if function_match:
   2545         if (not function_match.group(3)
   2546             or function_match.group(3) == ';'
   2547             or raw_line.find('/*') < 0):
   2548             error(line_number, 'readability/function', 3,
   2549                   'All parameters should be named in a function')
   2550         return
   2551 
   2552     # At this point, all that should be left is actual casts.
   2553     error(line_number, 'readability/casting', 4,
   2554           'Using C-style cast.  Use %s<%s>(...) instead' %
   2555           (cast_type, matched.group(1)))
   2556 
   2557 
   2558 _HEADERS_CONTAINING_TEMPLATES = (
   2559     ('<deque>', ('deque',)),
   2560     ('<functional>', ('unary_function', 'binary_function',
   2561                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
   2562                       'negate',
   2563                       'equal_to', 'not_equal_to', 'greater', 'less',
   2564                       'greater_equal', 'less_equal',
   2565                       'logical_and', 'logical_or', 'logical_not',
   2566                       'unary_negate', 'not1', 'binary_negate', 'not2',
   2567                       'bind1st', 'bind2nd',
   2568                       'pointer_to_unary_function',
   2569                       'pointer_to_binary_function',
   2570                       'ptr_fun',
   2571                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
   2572                       'mem_fun_ref_t',
   2573                       'const_mem_fun_t', 'const_mem_fun1_t',
   2574                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
   2575                       'mem_fun_ref',
   2576                      )),
   2577     ('<limits>', ('numeric_limits',)),
   2578     ('<list>', ('list',)),
   2579     ('<map>', ('map', 'multimap',)),
   2580     ('<memory>', ('allocator',)),
   2581     ('<queue>', ('queue', 'priority_queue',)),
   2582     ('<set>', ('set', 'multiset',)),
   2583     ('<stack>', ('stack',)),
   2584     ('<string>', ('char_traits', 'basic_string',)),
   2585     ('<utility>', ('pair',)),
   2586     ('<vector>', ('vector',)),
   2587 
   2588     # gcc extensions.
   2589     # Note: std::hash is their hash, ::hash is our hash
   2590     ('<hash_map>', ('hash_map', 'hash_multimap',)),
   2591     ('<hash_set>', ('hash_set', 'hash_multiset',)),
   2592     ('<slist>', ('slist',)),
   2593     )
   2594 
   2595 _HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
   2596     # We can trust with reasonable confidence that map gives us pair<>, too.
   2597     'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
   2598 }
   2599 
   2600 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
   2601 
   2602 _re_pattern_algorithm_header = []
   2603 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
   2604                   'transform'):
   2605     # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   2606     # type::max().
   2607     _re_pattern_algorithm_header.append(
   2608         (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
   2609          _template,
   2610          '<algorithm>'))
   2611 
   2612 _re_pattern_templates = []
   2613 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
   2614     for _template in _templates:
   2615         _re_pattern_templates.append(
   2616             (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
   2617              _template + '<>',
   2618              _header))
   2619 
   2620 
   2621 def files_belong_to_same_module(filename_cpp, filename_h):
   2622     """Check if these two filenames belong to the same module.
   2623 
   2624     The concept of a 'module' here is a as follows:
   2625     foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
   2626     same 'module' if they are in the same directory.
   2627     some/path/public/xyzzy and some/path/internal/xyzzy are also considered
   2628     to belong to the same module here.
   2629 
   2630     If the filename_cpp contains a longer path than the filename_h, for example,
   2631     '/absolute/path/to/base/sysinfo.cpp', and this file would include
   2632     'base/sysinfo.h', this function also produces the prefix needed to open the
   2633     header. This is used by the caller of this function to more robustly open the
   2634     header file. We don't have access to the real include paths in this context,
   2635     so we need this guesswork here.
   2636 
   2637     Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
   2638     according to this implementation. Because of this, this function gives
   2639     some false positives. This should be sufficiently rare in practice.
   2640 
   2641     Args:
   2642       filename_cpp: is the path for the .cpp file
   2643       filename_h: is the path for the header path
   2644 
   2645     Returns:
   2646       Tuple with a bool and a string:
   2647       bool: True if filename_cpp and filename_h belong to the same module.
   2648       string: the additional prefix needed to open the header file.
   2649     """
   2650 
   2651     if not filename_cpp.endswith('.cpp'):
   2652         return (False, '')
   2653     filename_cpp = filename_cpp[:-len('.cpp')]
   2654     if filename_cpp.endswith('_unittest'):
   2655         filename_cpp = filename_cpp[:-len('_unittest')]
   2656     elif filename_cpp.endswith('_test'):
   2657         filename_cpp = filename_cpp[:-len('_test')]
   2658     filename_cpp = filename_cpp.replace('/public/', '/')
   2659     filename_cpp = filename_cpp.replace('/internal/', '/')
   2660 
   2661     if not filename_h.endswith('.h'):
   2662         return (False, '')
   2663     filename_h = filename_h[:-len('.h')]
   2664     if filename_h.endswith('-inl'):
   2665         filename_h = filename_h[:-len('-inl')]
   2666     filename_h = filename_h.replace('/public/', '/')
   2667     filename_h = filename_h.replace('/internal/', '/')
   2668 
   2669     files_belong_to_same_module = filename_cpp.endswith(filename_h)
   2670     common_path = ''
   2671     if files_belong_to_same_module:
   2672         common_path = filename_cpp[:-len(filename_h)]
   2673     return files_belong_to_same_module, common_path
   2674 
   2675 
   2676 def update_include_state(filename, include_state, io=codecs):
   2677     """Fill up the include_state with new includes found from the file.
   2678 
   2679     Args:
   2680       filename: the name of the header to read.
   2681       include_state: an _IncludeState instance in which the headers are inserted.
   2682       io: The io factory to use to read the file. Provided for testability.
   2683 
   2684     Returns:
   2685       True if a header was succesfully added. False otherwise.
   2686     """
   2687     header_file = None
   2688     try:
   2689         header_file = io.open(filename, 'r', 'utf8', 'replace')
   2690     except IOError:
   2691         return False
   2692     line_number = 0
   2693     for line in header_file:
   2694         line_number += 1
   2695         clean_line = cleanse_comments(line)
   2696         matched = _RE_PATTERN_INCLUDE.search(clean_line)
   2697         if matched:
   2698             include = matched.group(2)
   2699             # The value formatting is cute, but not really used right now.
   2700             # What matters here is that the key is in include_state.
   2701             include_state.setdefault(include, '%s:%d' % (filename, line_number))
   2702     return True
   2703 
   2704 
   2705 def check_for_include_what_you_use(filename, clean_lines, include_state, error,
   2706                                    io=codecs):
   2707     """Reports for missing stl includes.
   2708 
   2709     This function will output warnings to make sure you are including the headers
   2710     necessary for the stl containers and functions that you use. We only give one
   2711     reason to include a header. For example, if you use both equal_to<> and
   2712     less<> in a .h file, only one (the latter in the file) of these will be
   2713     reported as a reason to include the <functional>.
   2714 
   2715     Args:
   2716       filename: The name of the current file.
   2717       clean_lines: A CleansedLines instance containing the file.
   2718       include_state: An _IncludeState instance.
   2719       error: The function to call with any errors found.
   2720       io: The IO factory to use to read the header file. Provided for unittest
   2721           injection.
   2722     """
   2723     required = {}  # A map of header name to line_number and the template entity.
   2724         # Example of required: { '<functional>': (1219, 'less<>') }
   2725 
   2726     for line_number in xrange(clean_lines.num_lines()):
   2727         line = clean_lines.elided[line_number]
   2728         if not line or line[0] == '#':
   2729             continue
   2730 
   2731         # String is special -- it is a non-templatized type in STL.
   2732         if _RE_PATTERN_STRING.search(line):
   2733             required['<string>'] = (line_number, 'string')
   2734 
   2735         for pattern, template, header in _re_pattern_algorithm_header:
   2736             if pattern.search(line):
   2737                 required[header] = (line_number, template)
   2738 
   2739         # The following function is just a speed up, no semantics are changed.
   2740         if not '<' in line:  # Reduces the cpu time usage by skipping lines.
   2741             continue
   2742 
   2743         for pattern, template, header in _re_pattern_templates:
   2744             if pattern.search(line):
   2745                 required[header] = (line_number, template)
   2746 
   2747     # The policy is that if you #include something in foo.h you don't need to
   2748     # include it again in foo.cpp. Here, we will look at possible includes.
   2749     # Let's copy the include_state so it is only messed up within this function.
   2750     include_state = include_state.copy()
   2751 
   2752     # Did we find the header for this file (if any) and succesfully load it?
   2753     header_found = False
   2754 
   2755     # Use the absolute path so that matching works properly.
   2756     abs_filename = os.path.abspath(filename)
   2757 
   2758     # For Emacs's flymake.
   2759     # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
   2760     # by flymake and that file name might end with '_flymake.cpp'. In that case,
   2761     # restore original file name here so that the corresponding header file can be
   2762     # found.
   2763     # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
   2764     # instead of 'foo_flymake.h'
   2765     emacs_flymake_suffix = '_flymake.cpp'
   2766     if abs_filename.endswith(emacs_flymake_suffix):
   2767         abs_filename = abs_filename[:-len(emacs_flymake_suffix)] + '.cpp'
   2768 
   2769     # include_state is modified during iteration, so we iterate over a copy of
   2770     # the keys.
   2771     for header in include_state.keys():  #NOLINT
   2772         (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
   2773         fullpath = common_path + header
   2774         if same_module and update_include_state(fullpath, include_state, io):
   2775             header_found = True
   2776 
   2777     # If we can't find the header file for a .cpp, assume it's because we don't
   2778     # know where to look. In that case we'll give up as we're not sure they
   2779     # didn't include it in the .h file.
   2780     # FIXME: Do a better job of finding .h files so we are confident that
   2781     #        not having the .h file means there isn't one.
   2782     if filename.endswith('.cpp') and not header_found:
   2783         return
   2784 
   2785     # All the lines have been processed, report the errors found.
   2786     for required_header_unstripped in required:
   2787         template = required[required_header_unstripped][1]
   2788         if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
   2789             headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
   2790             if [True for header in headers if header in include_state]:
   2791                 continue
   2792         if required_header_unstripped.strip('<>"') not in include_state:
   2793             error(required[required_header_unstripped][0],
   2794                   'build/include_what_you_use', 4,
   2795                   'Add #include ' + required_header_unstripped + ' for ' + template)
   2796 
   2797 
   2798 def process_line(filename, file_extension,
   2799                  clean_lines, line, include_state, function_state,
   2800                  class_state, file_state, error):
   2801     """Processes a single line in the file.
   2802 
   2803     Args:
   2804       filename: Filename of the file that is being processed.
   2805       file_extension: The extension (dot not included) of the file.
   2806       clean_lines: An array of strings, each representing a line of the file,
   2807                    with comments stripped.
   2808       line: Number of line being processed.
   2809       include_state: An _IncludeState instance in which the headers are inserted.
   2810       function_state: A _FunctionState instance which counts function lines, etc.
   2811       class_state: A _ClassState instance which maintains information about
   2812                    the current stack of nested class declarations being parsed.
   2813       file_state: A _FileState instance which maintains information about
   2814                   the state of things in the file.
   2815       error: A callable to which errors are reported, which takes arguments:
   2816              line number, error level, and message
   2817 
   2818     """
   2819     raw_lines = clean_lines.raw_lines
   2820     check_for_function_lengths(clean_lines, line, function_state, error)
   2821     if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
   2822         return
   2823     check_for_multiline_comments_and_strings(clean_lines, line, error)
   2824     check_style(clean_lines, line, file_extension, file_state, error)
   2825     check_language(filename, clean_lines, line, file_extension, include_state,
   2826                    error)
   2827     check_for_non_standard_constructs(clean_lines, line, class_state, error)
   2828     check_posix_threading(clean_lines, line, error)
   2829     check_invalid_increment(clean_lines, line, error)
   2830 
   2831 
   2832 def _process_lines(filename, file_extension, lines, error, verbosity):
   2833     """Performs lint checks and reports any errors to the given error function.
   2834 
   2835     Args:
   2836       filename: Filename of the file that is being processed.
   2837       file_extension: The extension (dot not included) of the file.
   2838       lines: An array of strings, each representing a line of the file, with the
   2839              last element being empty if the file is termined with a newline.
   2840       error: A callable to which errors are reported, which takes 4 arguments:
   2841     """
   2842     lines = (['// marker so line numbers and indices both start at 1'] + lines +
   2843              ['// marker so line numbers end in a known way'])
   2844 
   2845     include_state = _IncludeState()
   2846     function_state = _FunctionState(verbosity)
   2847     class_state = _ClassState()
   2848     file_state = _FileState()
   2849 
   2850     check_for_copyright(lines, error)
   2851 
   2852     if file_extension == 'h':
   2853         check_for_header_guard(filename, lines, error)
   2854 
   2855     remove_multi_line_comments(lines, error)
   2856     clean_lines = CleansedLines(lines)
   2857     for line in xrange(clean_lines.num_lines()):
   2858         process_line(filename, file_extension, clean_lines, line,
   2859                      include_state, function_state, class_state, file_state, error)
   2860     class_state.check_finished(error)
   2861 
   2862     check_for_include_what_you_use(filename, clean_lines, include_state, error)
   2863 
   2864     # We check here rather than inside process_line so that we see raw
   2865     # lines rather than "cleaned" lines.
   2866     check_for_unicode_replacement_characters(lines, error)
   2867 
   2868     check_for_new_line_at_eof(lines, error)
   2869 
   2870 
   2871 class CppProcessor(object):
   2872 
   2873     """Processes C++ lines for checking style."""
   2874 
   2875     # This list is used to--
   2876     #
   2877     # (1) generate an explicit list of all possible categories,
   2878     # (2) unit test that all checked categories have valid names, and
   2879     # (3) unit test that all categories are getting unit tested.
   2880     #
   2881     categories = set([
   2882         'build/class',
   2883         'build/deprecated',
   2884         'build/endif_comment',
   2885         'build/forward_decl',
   2886         'build/header_guard',
   2887         'build/include',
   2888         'build/include_order',
   2889         'build/include_what_you_use',
   2890         'build/namespaces',
   2891         'build/printf_format',
   2892         'build/storage_class',
   2893         'build/using_std',
   2894         'legal/copyright',
   2895         'readability/braces',
   2896         'readability/casting',
   2897         'readability/check',
   2898         'readability/comparison_to_zero',
   2899         'readability/constructors',
   2900         'readability/control_flow',
   2901         'readability/fn_size',
   2902         'readability/function',
   2903         'readability/multiline_comment',
   2904         'readability/multiline_string',
   2905         'readability/naming',
   2906         'readability/null',
   2907         'readability/streams',
   2908         'readability/todo',
   2909         'readability/utf8',
   2910         'runtime/arrays',
   2911         'runtime/casting',
   2912         'runtime/explicit',
   2913         'runtime/init',
   2914         'runtime/int',
   2915         'runtime/invalid_increment',
   2916         'runtime/max_min_macros',
   2917         'runtime/memset',
   2918         'runtime/printf',
   2919         'runtime/printf_format',
   2920         'runtime/references',
   2921         'runtime/rtti',
   2922         'runtime/sizeof',
   2923         'runtime/string',
   2924         'runtime/threadsafe_fn',
   2925         'runtime/virtual',
   2926         'whitespace/blank_line',
   2927         'whitespace/braces',
   2928         'whitespace/comma',
   2929         'whitespace/comments',
   2930         'whitespace/declaration',
   2931         'whitespace/end_of_line',
   2932         'whitespace/ending_newline',
   2933         'whitespace/indent',
   2934         'whitespace/labels',
   2935         'whitespace/line_length',
   2936         'whitespace/newline',
   2937         'whitespace/operators',
   2938         'whitespace/parens',
   2939         'whitespace/semicolon',
   2940         'whitespace/tab',
   2941         'whitespace/todo',
   2942         ])
   2943 
   2944     def __init__(self, file_path, file_extension, handle_style_error, verbosity):
   2945         """Create a CppProcessor instance.
   2946 
   2947         Args:
   2948           file_extension: A string that is the file extension, without
   2949                           the leading dot.
   2950 
   2951         """
   2952         self.file_extension = file_extension
   2953         self.file_path = file_path
   2954         self.handle_style_error = handle_style_error
   2955         self.verbosity = verbosity
   2956 
   2957     # Useful for unit testing.
   2958     def __eq__(self, other):
   2959         """Return whether this CppProcessor instance is equal to another."""
   2960         if self.file_extension != other.file_extension:
   2961             return False
   2962         if self.file_path != other.file_path:
   2963             return False
   2964         if self.handle_style_error != other.handle_style_error:
   2965             return False
   2966         if self.verbosity != other.verbosity:
   2967             return False
   2968 
   2969         return True
   2970 
   2971     # Useful for unit testing.
   2972     def __ne__(self, other):
   2973         # Python does not automatically deduce __ne__() from __eq__().
   2974         return not self.__eq__(other)
   2975 
   2976     def process(self, lines):
   2977         _process_lines(self.file_path, self.file_extension, lines,
   2978                        self.handle_style_error, self.verbosity)
   2979 
   2980 
   2981 # FIXME: Remove this function (requires refactoring unit tests).
   2982 def process_file_data(filename, file_extension, lines, error, verbosity):
   2983     processor = CppProcessor(filename, file_extension, error, verbosity)
   2984     processor.process(lines)
   2985 
   2986