Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python
      2 #
      3 # Copyright (c) 2009 Google Inc. All rights reserved.
      4 #
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #    * Redistributions of source code must retain the above copyright
     10 # notice, this list of conditions and the following disclaimer.
     11 #    * Redistributions in binary form must reproduce the above
     12 # copyright notice, this list of conditions and the following disclaimer
     13 # in the documentation and/or other materials provided with the
     14 # distribution.
     15 #    * Neither the name of Google Inc. nor the names of its
     16 # contributors may be used to endorse or promote products derived from
     17 # this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 """Does google-lint on c++ files.
     32 
     33 The goal of this script is to identify places in the code that *may*
     34 be in non-compliance with google style.  It does not attempt to fix
     35 up these problems -- the point is to educate.  It does also not
     36 attempt to find all problems, or to ensure that everything it does
     37 find is legitimately a problem.
     38 
     39 In particular, we can get very confused by /* and // inside strings!
     40 We do a small hack, which is to ignore //'s with "'s after them on the
     41 same line, but it is far from perfect (in either direction).
     42 """
     43 
     44 import codecs
     45 import copy
     46 import getopt
     47 import math  # for log
     48 import os
     49 import re
     50 import sre_compile
     51 import string
     52 import sys
     53 import unicodedata
     54 
     55 
     56 _USAGE = """
     57 Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
     58                    [--counting=total|toplevel|detailed] [--root=subdir]
     59                    [--linelength=digits]
     60         <file> [file] ...
     61 
     62   The style guidelines this tries to follow are those in
     63     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
     64 
     65   Every problem is given a confidence score from 1-5, with 5 meaning we are
     66   certain of the problem, and 1 meaning it could be a legitimate construct.
     67   This will miss some errors, and is not a substitute for a code review.
     68 
     69   To suppress false-positive errors of a certain category, add a
     70   'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
     71   suppresses errors of all categories on that line.
     72 
     73   The files passed in will be linted; at least one file must be provided.
     74   Default linted extensions are .cc, .cpp, .cu, .cuh and .h.  Change the
     75   extensions with the --extensions flag.
     76 
     77   Flags:
     78 
     79     output=vs7
     80       By default, the output is formatted to ease emacs parsing.  Visual Studio
     81       compatible output (vs7) may also be used.  Other formats are unsupported.
     82 
     83     verbose=#
     84       Specify a number 0-5 to restrict errors to certain verbosity levels.
     85 
     86     filter=-x,+y,...
     87       Specify a comma-separated list of category-filters to apply: only
     88       error messages whose category names pass the filters will be printed.
     89       (Category names are printed with the message and look like
     90       "[whitespace/indent]".)  Filters are evaluated left to right.
     91       "-FOO" and "FOO" means "do not print categories that start with FOO".
     92       "+FOO" means "do print categories that start with FOO".
     93 
     94       Examples: --filter=-whitespace,+whitespace/braces
     95                 --filter=whitespace,runtime/printf,+runtime/printf_format
     96                 --filter=-,+build/include_what_you_use
     97 
     98       To see a list of all the categories used in cpplint, pass no arg:
     99          --filter=
    100 
    101     counting=total|toplevel|detailed
    102       The total number of errors found is always printed. If
    103       'toplevel' is provided, then the count of errors in each of
    104       the top-level categories like 'build' and 'whitespace' will
    105       also be printed. If 'detailed' is provided, then a count
    106       is provided for each category like 'build/class'.
    107 
    108     root=subdir
    109       The root directory used for deriving header guard CPP variable.
    110       By default, the header guard CPP variable is calculated as the relative
    111       path to the directory that contains .git, .hg, or .svn.  When this flag
    112       is specified, the relative path is calculated from the specified
    113       directory. If the specified directory does not exist, this flag is
    114       ignored.
    115 
    116       Examples:
    117         Assuing that src/.git exists, the header guard CPP variables for
    118         src/chrome/browser/ui/browser.h are:
    119 
    120         No flag => CHROME_BROWSER_UI_BROWSER_H_
    121         --root=chrome => BROWSER_UI_BROWSER_H_
    122         --root=chrome/browser => UI_BROWSER_H_
    123 
    124     linelength=digits
    125       This is the allowed line length for the project. The default value is
    126       80 characters.
    127 
    128       Examples:
    129         --linelength=120
    130 
    131     extensions=extension,extension,...
    132       The allowed file extensions that cpplint will check
    133 
    134       Examples:
    135         --extensions=hpp,cpp
    136 """
    137 
    138 # We categorize each error message we print.  Here are the categories.
    139 # We want an explicit list so we can list them all in cpplint --filter=.
    140 # If you add a new error message with a new category, add it to the list
    141 # here!  cpplint_unittest.py should tell you if you forget to do this.
    142 _ERROR_CATEGORIES = [
    143   'build/class',
    144   'build/deprecated',
    145   'build/endif_comment',
    146   'build/explicit_make_pair',
    147   'build/forward_decl',
    148   'build/header_guard',
    149   'build/include',
    150   'build/include_alpha',
    151   'build/include_order',
    152   'build/include_what_you_use',
    153   'build/namespaces',
    154   'build/printf_format',
    155   'build/storage_class',
    156   'legal/copyright',
    157   'readability/alt_tokens',
    158   'readability/braces',
    159   'readability/casting',
    160   'readability/check',
    161   'readability/constructors',
    162   'readability/fn_size',
    163   'readability/function',
    164   'readability/multiline_comment',
    165   'readability/multiline_string',
    166   'readability/namespace',
    167   'readability/nolint',
    168   'readability/nul',
    169   'readability/streams',
    170   'readability/todo',
    171   'readability/utf8',
    172   'runtime/arrays',
    173   'runtime/casting',
    174   'runtime/explicit',
    175   'runtime/int',
    176   'runtime/init',
    177   'runtime/invalid_increment',
    178   'runtime/member_string_references',
    179   'runtime/memset',
    180   'runtime/operator',
    181   'runtime/printf',
    182   'runtime/printf_format',
    183   'runtime/references',
    184   'runtime/sizeof',
    185   'runtime/string',
    186   'runtime/threadsafe_fn',
    187   'runtime/vlog',
    188   'whitespace/blank_line',
    189   'whitespace/braces',
    190   'whitespace/comma',
    191   'whitespace/comments',
    192   'whitespace/empty_conditional_body',
    193   'whitespace/empty_loop_body',
    194   'whitespace/end_of_line',
    195   'whitespace/ending_newline',
    196   'whitespace/forcolon',
    197   'whitespace/indent',
    198   'whitespace/line_length',
    199   'whitespace/newline',
    200   'whitespace/operators',
    201   'whitespace/parens',
    202   'whitespace/semicolon',
    203   'whitespace/tab',
    204   'whitespace/todo'
    205   ]
    206 
    207 # The default state of the category filter. This is overrided by the --filter=
    208 # flag. By default all errors are on, so only add here categories that should be
    209 # off by default (i.e., categories that must be enabled by the --filter= flags).
    210 # All entries here should start with a '-' or '+', as in the --filter= flag.
    211 _DEFAULT_FILTERS = ['-build/include_alpha']
    212 
    213 # We used to check for high-bit characters, but after much discussion we
    214 # decided those were OK, as long as they were in UTF-8 and didn't represent
    215 # hard-coded international strings, which belong in a separate i18n file.
    216 
    217 
    218 # C++ headers
    219 _CPP_HEADERS = frozenset([
    220     # Legacy
    221     'algobase.h',
    222     'algo.h',
    223     'alloc.h',
    224     'builtinbuf.h',
    225     'bvector.h',
    226     'complex.h',
    227     'defalloc.h',
    228     'deque.h',
    229     'editbuf.h',
    230     'fstream.h',
    231     'function.h',
    232     'hash_map',
    233     'hash_map.h',
    234     'hash_set',
    235     'hash_set.h',
    236     'hashtable.h',
    237     'heap.h',
    238     'indstream.h',
    239     'iomanip.h',
    240     'iostream.h',
    241     'istream.h',
    242     'iterator.h',
    243     'list.h',
    244     'map.h',
    245     'multimap.h',
    246     'multiset.h',
    247     'ostream.h',
    248     'pair.h',
    249     'parsestream.h',
    250     'pfstream.h',
    251     'procbuf.h',
    252     'pthread_alloc',
    253     'pthread_alloc.h',
    254     'rope',
    255     'rope.h',
    256     'ropeimpl.h',
    257     'set.h',
    258     'slist',
    259     'slist.h',
    260     'stack.h',
    261     'stdiostream.h',
    262     'stl_alloc.h',
    263     'stl_relops.h',
    264     'streambuf.h',
    265     'stream.h',
    266     'strfile.h',
    267     'strstream.h',
    268     'tempbuf.h',
    269     'tree.h',
    270     'type_traits.h',
    271     'vector.h',
    272     # 17.6.1.2 C++ library headers
    273     'algorithm',
    274     'array',
    275     'atomic',
    276     'bitset',
    277     'chrono',
    278     'codecvt',
    279     'complex',
    280     'condition_variable',
    281     'deque',
    282     'exception',
    283     'forward_list',
    284     'fstream',
    285     'functional',
    286     'future',
    287     'initializer_list',
    288     'iomanip',
    289     'ios',
    290     'iosfwd',
    291     'iostream',
    292     'istream',
    293     'iterator',
    294     'limits',
    295     'list',
    296     'locale',
    297     'map',
    298     'memory',
    299     'mutex',
    300     'new',
    301     'numeric',
    302     'ostream',
    303     'queue',
    304     'random',
    305     'ratio',
    306     'regex',
    307     'set',
    308     'sstream',
    309     'stack',
    310     'stdexcept',
    311     'streambuf',
    312     'string',
    313     'strstream',
    314     'system_error',
    315     'thread',
    316     'tuple',
    317     'typeindex',
    318     'typeinfo',
    319     'type_traits',
    320     'unordered_map',
    321     'unordered_set',
    322     'utility',
    323     'valarray',
    324     'vector',
    325     # 17.6.1.2 C++ headers for C library facilities
    326     'cassert',
    327     'ccomplex',
    328     'cctype',
    329     'cerrno',
    330     'cfenv',
    331     'cfloat',
    332     'cinttypes',
    333     'ciso646',
    334     'climits',
    335     'clocale',
    336     'cmath',
    337     'csetjmp',
    338     'csignal',
    339     'cstdalign',
    340     'cstdarg',
    341     'cstdbool',
    342     'cstddef',
    343     'cstdint',
    344     'cstdio',
    345     'cstdlib',
    346     'cstring',
    347     'ctgmath',
    348     'ctime',
    349     'cuchar',
    350     'cwchar',
    351     'cwctype',
    352     ])
    353 
    354 # Assertion macros.  These are defined in base/logging.h and
    355 # testing/base/gunit.h.  Note that the _M versions need to come first
    356 # for substring matching to work.
    357 _CHECK_MACROS = [
    358     'DCHECK', 'CHECK',
    359     'EXPECT_TRUE_M', 'EXPECT_TRUE',
    360     'ASSERT_TRUE_M', 'ASSERT_TRUE',
    361     'EXPECT_FALSE_M', 'EXPECT_FALSE',
    362     'ASSERT_FALSE_M', 'ASSERT_FALSE',
    363     ]
    364 
    365 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
    366 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
    367 
    368 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
    369                         ('>=', 'GE'), ('>', 'GT'),
    370                         ('<=', 'LE'), ('<', 'LT')]:
    371   _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
    372   _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
    373   _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    374   _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    375   _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
    376   _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
    377 
    378 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
    379                             ('>=', 'LT'), ('>', 'LE'),
    380                             ('<=', 'GT'), ('<', 'GE')]:
    381   _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    382   _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    383   _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
    384   _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
    385 
    386 # Alternative tokens and their replacements.  For full list, see section 2.5
    387 # Alternative tokens [lex.digraph] in the C++ standard.
    388 #
    389 # Digraphs (such as '%:') are not included here since it's a mess to
    390 # match those on a word boundary.
    391 _ALT_TOKEN_REPLACEMENT = {
    392     'and': '&&',
    393     'bitor': '|',
    394     'or': '||',
    395     'xor': '^',
    396     'compl': '~',
    397     'bitand': '&',
    398     'and_eq': '&=',
    399     'or_eq': '|=',
    400     'xor_eq': '^=',
    401     'not': '!',
    402     'not_eq': '!='
    403     }
    404 
    405 # Compile regular expression that matches all the above keywords.  The "[ =()]"
    406 # bit is meant to avoid matching these keywords outside of boolean expressions.
    407 #
    408 # False positives include C-style multi-line comments and multi-line strings
    409 # but those have always been troublesome for cpplint.
    410 _ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
    411     r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
    412 
    413 
    414 # These constants define types of headers for use with
    415 # _IncludeState.CheckNextIncludeOrder().
    416 _C_SYS_HEADER = 1
    417 _CPP_SYS_HEADER = 2
    418 _LIKELY_MY_HEADER = 3
    419 _POSSIBLE_MY_HEADER = 4
    420 _OTHER_HEADER = 5
    421 
    422 # These constants define the current inline assembly state
    423 _NO_ASM = 0       # Outside of inline assembly block
    424 _INSIDE_ASM = 1   # Inside inline assembly block
    425 _END_ASM = 2      # Last line of inline assembly block
    426 _BLOCK_ASM = 3    # The whole block is an inline assembly block
    427 
    428 # Match start of assembly blocks
    429 _MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
    430                         r'(?:\s+(volatile|__volatile__))?'
    431                         r'\s*[{(]')
    432 
    433 
    434 _regexp_compile_cache = {}
    435 
    436 # Finds occurrences of NOLINT or NOLINT(...).
    437 _RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
    438 
    439 # {str, set(int)}: a map from error categories to sets of linenumbers
    440 # on which those errors are expected and should be suppressed.
    441 _error_suppressions = {}
    442 
    443 # The root directory used for deriving header guard CPP variable.
    444 # This is set by --root flag.
    445 _root = None
    446 
    447 # The allowed line length of files.
    448 # This is set by --linelength flag.
    449 _line_length = 80
    450 
    451 # The allowed extensions for file names
    452 # This is set by --extensions flag.
    453 _valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
    454 
    455 def ParseNolintSuppressions(filename, raw_line, linenum, error):
    456   """Updates the global list of error-suppressions.
    457 
    458   Parses any NOLINT comments on the current line, updating the global
    459   error_suppressions store.  Reports an error if the NOLINT comment
    460   was malformed.
    461 
    462   Args:
    463     filename: str, the name of the input file.
    464     raw_line: str, the line of input text, with comments.
    465     linenum: int, the number of the current line.
    466     error: function, an error handler.
    467   """
    468   # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
    469   matched = _RE_SUPPRESSION.search(raw_line)
    470   if matched:
    471     category = matched.group(1)
    472     if category in (None, '(*)'):  # => "suppress all"
    473       _error_suppressions.setdefault(None, set()).add(linenum)
    474     else:
    475       if category.startswith('(') and category.endswith(')'):
    476         category = category[1:-1]
    477         if category in _ERROR_CATEGORIES:
    478           _error_suppressions.setdefault(category, set()).add(linenum)
    479         else:
    480           error(filename, linenum, 'readability/nolint', 5,
    481                 'Unknown NOLINT error category: %s' % category)
    482 
    483 
    484 def ResetNolintSuppressions():
    485   "Resets the set of NOLINT suppressions to empty."
    486   _error_suppressions.clear()
    487 
    488 
    489 def IsErrorSuppressedByNolint(category, linenum):
    490   """Returns true if the specified error category is suppressed on this line.
    491 
    492   Consults the global error_suppressions map populated by
    493   ParseNolintSuppressions/ResetNolintSuppressions.
    494 
    495   Args:
    496     category: str, the category of the error.
    497     linenum: int, the current line number.
    498   Returns:
    499     bool, True iff the error should be suppressed due to a NOLINT comment.
    500   """
    501   return (linenum in _error_suppressions.get(category, set()) or
    502           linenum in _error_suppressions.get(None, set()))
    503 
    504 def Match(pattern, s):
    505   """Matches the string with the pattern, caching the compiled regexp."""
    506   # The regexp compilation caching is inlined in both Match and Search for
    507   # performance reasons; factoring it out into a separate function turns out
    508   # to be noticeably expensive.
    509   if pattern not in _regexp_compile_cache:
    510     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    511   return _regexp_compile_cache[pattern].match(s)
    512 
    513 
    514 def ReplaceAll(pattern, rep, s):
    515   """Replaces instances of pattern in a string with a replacement.
    516 
    517   The compiled regex is kept in a cache shared by Match and Search.
    518 
    519   Args:
    520     pattern: regex pattern
    521     rep: replacement text
    522     s: search string
    523 
    524   Returns:
    525     string with replacements made (or original string if no replacements)
    526   """
    527   if pattern not in _regexp_compile_cache:
    528     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    529   return _regexp_compile_cache[pattern].sub(rep, s)
    530 
    531 
    532 def Search(pattern, s):
    533   """Searches the string for the pattern, caching the compiled regexp."""
    534   if pattern not in _regexp_compile_cache:
    535     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    536   return _regexp_compile_cache[pattern].search(s)
    537 
    538 
    539 class _IncludeState(dict):
    540   """Tracks line numbers for includes, and the order in which includes appear.
    541 
    542   As a dict, an _IncludeState object serves as a mapping between include
    543   filename and line number on which that file was included.
    544 
    545   Call CheckNextIncludeOrder() once for each header in the file, passing
    546   in the type constants defined above. Calls in an illegal order will
    547   raise an _IncludeError with an appropriate error message.
    548 
    549   """
    550   # self._section will move monotonically through this set. If it ever
    551   # needs to move backwards, CheckNextIncludeOrder will raise an error.
    552   _INITIAL_SECTION = 0
    553   _MY_H_SECTION = 1
    554   _C_SECTION = 2
    555   _CPP_SECTION = 3
    556   _OTHER_H_SECTION = 4
    557 
    558   _TYPE_NAMES = {
    559       _C_SYS_HEADER: 'C system header',
    560       _CPP_SYS_HEADER: 'C++ system header',
    561       _LIKELY_MY_HEADER: 'header this file implements',
    562       _POSSIBLE_MY_HEADER: 'header this file may implement',
    563       _OTHER_HEADER: 'other header',
    564       }
    565   _SECTION_NAMES = {
    566       _INITIAL_SECTION: "... nothing. (This can't be an error.)",
    567       _MY_H_SECTION: 'a header this file implements',
    568       _C_SECTION: 'C system header',
    569       _CPP_SECTION: 'C++ system header',
    570       _OTHER_H_SECTION: 'other header',
    571       }
    572 
    573   def __init__(self):
    574     dict.__init__(self)
    575     self.ResetSection()
    576 
    577   def ResetSection(self):
    578     # The name of the current section.
    579     self._section = self._INITIAL_SECTION
    580     # The path of last found header.
    581     self._last_header = ''
    582 
    583   def SetLastHeader(self, header_path):
    584     self._last_header = header_path
    585 
    586   def CanonicalizeAlphabeticalOrder(self, header_path):
    587     """Returns a path canonicalized for alphabetical comparison.
    588 
    589     - replaces "-" with "_" so they both cmp the same.
    590     - removes '-inl' since we don't require them to be after the main header.
    591     - lowercase everything, just in case.
    592 
    593     Args:
    594       header_path: Path to be canonicalized.
    595 
    596     Returns:
    597       Canonicalized path.
    598     """
    599     return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
    600 
    601   def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
    602     """Check if a header is in alphabetical order with the previous header.
    603 
    604     Args:
    605       clean_lines: A CleansedLines instance containing the file.
    606       linenum: The number of the line to check.
    607       header_path: Canonicalized header to be checked.
    608 
    609     Returns:
    610       Returns true if the header is in alphabetical order.
    611     """
    612     # If previous section is different from current section, _last_header will
    613     # be reset to empty string, so it's always less than current header.
    614     #
    615     # If previous line was a blank line, assume that the headers are
    616     # intentionally sorted the way they are.
    617     if (self._last_header > header_path and
    618         not Match(r'^\s*$', clean_lines.elided[linenum - 1])):
    619       return False
    620     return True
    621 
    622   def CheckNextIncludeOrder(self, header_type):
    623     """Returns a non-empty error message if the next header is out of order.
    624 
    625     This function also updates the internal state to be ready to check
    626     the next include.
    627 
    628     Args:
    629       header_type: One of the _XXX_HEADER constants defined above.
    630 
    631     Returns:
    632       The empty string if the header is in the right order, or an
    633       error message describing what's wrong.
    634 
    635     """
    636     error_message = ('Found %s after %s' %
    637                      (self._TYPE_NAMES[header_type],
    638                       self._SECTION_NAMES[self._section]))
    639 
    640     last_section = self._section
    641 
    642     if header_type == _C_SYS_HEADER:
    643       if self._section <= self._C_SECTION:
    644         self._section = self._C_SECTION
    645       else:
    646         self._last_header = ''
    647         return error_message
    648     elif header_type == _CPP_SYS_HEADER:
    649       if self._section <= self._CPP_SECTION:
    650         self._section = self._CPP_SECTION
    651       else:
    652         self._last_header = ''
    653         return error_message
    654     elif header_type == _LIKELY_MY_HEADER:
    655       if self._section <= self._MY_H_SECTION:
    656         self._section = self._MY_H_SECTION
    657       else:
    658         self._section = self._OTHER_H_SECTION
    659     elif header_type == _POSSIBLE_MY_HEADER:
    660       if self._section <= self._MY_H_SECTION:
    661         self._section = self._MY_H_SECTION
    662       else:
    663         # This will always be the fallback because we're not sure
    664         # enough that the header is associated with this file.
    665         self._section = self._OTHER_H_SECTION
    666     else:
    667       assert header_type == _OTHER_HEADER
    668       self._section = self._OTHER_H_SECTION
    669 
    670     if last_section != self._section:
    671       self._last_header = ''
    672 
    673     return ''
    674 
    675 
    676 class _CppLintState(object):
    677   """Maintains module-wide state.."""
    678 
    679   def __init__(self):
    680     self.verbose_level = 1  # global setting.
    681     self.error_count = 0    # global count of reported errors
    682     # filters to apply when emitting error messages
    683     self.filters = _DEFAULT_FILTERS[:]
    684     self.counting = 'total'  # In what way are we counting errors?
    685     self.errors_by_category = {}  # string to int dict storing error counts
    686 
    687     # output format:
    688     # "emacs" - format that emacs can parse (default)
    689     # "vs7" - format that Microsoft Visual Studio 7 can parse
    690     self.output_format = 'emacs'
    691 
    692   def SetOutputFormat(self, output_format):
    693     """Sets the output format for errors."""
    694     self.output_format = output_format
    695 
    696   def SetVerboseLevel(self, level):
    697     """Sets the module's verbosity, and returns the previous setting."""
    698     last_verbose_level = self.verbose_level
    699     self.verbose_level = level
    700     return last_verbose_level
    701 
    702   def SetCountingStyle(self, counting_style):
    703     """Sets the module's counting options."""
    704     self.counting = counting_style
    705 
    706   def SetFilters(self, filters):
    707     """Sets the error-message filters.
    708 
    709     These filters are applied when deciding whether to emit a given
    710     error message.
    711 
    712     Args:
    713       filters: A string of comma-separated filters (eg "+whitespace/indent").
    714                Each filter should start with + or -; else we die.
    715 
    716     Raises:
    717       ValueError: The comma-separated filters did not all start with '+' or '-'.
    718                   E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
    719     """
    720     # Default filters always have less priority than the flag ones.
    721     self.filters = _DEFAULT_FILTERS[:]
    722     for filt in filters.split(','):
    723       clean_filt = filt.strip()
    724       if clean_filt:
    725         self.filters.append(clean_filt)
    726     for filt in self.filters:
    727       if not (filt.startswith('+') or filt.startswith('-')):
    728         raise ValueError('Every filter in --filters must start with + or -'
    729                          ' (%s does not)' % filt)
    730 
    731   def ResetErrorCounts(self):
    732     """Sets the module's error statistic back to zero."""
    733     self.error_count = 0
    734     self.errors_by_category = {}
    735 
    736   def IncrementErrorCount(self, category):
    737     """Bumps the module's error statistic."""
    738     self.error_count += 1
    739     if self.counting in ('toplevel', 'detailed'):
    740       if self.counting != 'detailed':
    741         category = category.split('/')[0]
    742       if category not in self.errors_by_category:
    743         self.errors_by_category[category] = 0
    744       self.errors_by_category[category] += 1
    745 
    746   def PrintErrorCounts(self):
    747     """Print a summary of errors by category, and the total."""
    748     for category, count in self.errors_by_category.iteritems():
    749       sys.stderr.write('Category \'%s\' errors found: %d\n' %
    750                        (category, count))
    751     sys.stderr.write('Total errors found: %d\n' % self.error_count)
    752 
    753 _cpplint_state = _CppLintState()
    754 
    755 
    756 def _OutputFormat():
    757   """Gets the module's output format."""
    758   return _cpplint_state.output_format
    759 
    760 
    761 def _SetOutputFormat(output_format):
    762   """Sets the module's output format."""
    763   _cpplint_state.SetOutputFormat(output_format)
    764 
    765 
    766 def _VerboseLevel():
    767   """Returns the module's verbosity setting."""
    768   return _cpplint_state.verbose_level
    769 
    770 
    771 def _SetVerboseLevel(level):
    772   """Sets the module's verbosity, and returns the previous setting."""
    773   return _cpplint_state.SetVerboseLevel(level)
    774 
    775 
    776 def _SetCountingStyle(level):
    777   """Sets the module's counting options."""
    778   _cpplint_state.SetCountingStyle(level)
    779 
    780 
    781 def _Filters():
    782   """Returns the module's list of output filters, as a list."""
    783   return _cpplint_state.filters
    784 
    785 
    786 def _SetFilters(filters):
    787   """Sets the module's error-message filters.
    788 
    789   These filters are applied when deciding whether to emit a given
    790   error message.
    791 
    792   Args:
    793     filters: A string of comma-separated filters (eg "whitespace/indent").
    794              Each filter should start with + or -; else we die.
    795   """
    796   _cpplint_state.SetFilters(filters)
    797 
    798 
    799 class _FunctionState(object):
    800   """Tracks current function name and the number of lines in its body."""
    801 
    802   _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    803   _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
    804 
    805   def __init__(self):
    806     self.in_a_function = False
    807     self.lines_in_function = 0
    808     self.current_function = ''
    809 
    810   def Begin(self, function_name):
    811     """Start analyzing function body.
    812 
    813     Args:
    814       function_name: The name of the function being tracked.
    815     """
    816     self.in_a_function = True
    817     self.lines_in_function = 0
    818     self.current_function = function_name
    819 
    820   def Count(self):
    821     """Count line in current function body."""
    822     if self.in_a_function:
    823       self.lines_in_function += 1
    824 
    825   def Check(self, error, filename, linenum):
    826     """Report if too many lines in function body.
    827 
    828     Args:
    829       error: The function to call with any errors found.
    830       filename: The name of the current file.
    831       linenum: The number of the line to check.
    832     """
    833     if Match(r'T(EST|est)', self.current_function):
    834       base_trigger = self._TEST_TRIGGER
    835     else:
    836       base_trigger = self._NORMAL_TRIGGER
    837     trigger = base_trigger * 2**_VerboseLevel()
    838 
    839     if self.lines_in_function > trigger:
    840       error_level = int(math.log(self.lines_in_function / base_trigger, 2))
    841       # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
    842       if error_level > 5:
    843         error_level = 5
    844       error(filename, linenum, 'readability/fn_size', error_level,
    845             'Small and focused functions are preferred:'
    846             ' %s has %d non-comment lines'
    847             ' (error triggered by exceeding %d lines).'  % (
    848                 self.current_function, self.lines_in_function, trigger))
    849 
    850   def End(self):
    851     """Stop analyzing function body."""
    852     self.in_a_function = False
    853 
    854 
    855 class _IncludeError(Exception):
    856   """Indicates a problem with the include order in a file."""
    857   pass
    858 
    859 
    860 class FileInfo:
    861   """Provides utility functions for filenames.
    862 
    863   FileInfo provides easy access to the components of a file's path
    864   relative to the project root.
    865   """
    866 
    867   def __init__(self, filename):
    868     self._filename = filename
    869 
    870   def FullName(self):
    871     """Make Windows paths like Unix."""
    872     return os.path.abspath(self._filename).replace('\\', '/')
    873 
    874   def RepositoryName(self):
    875     """FullName after removing the local path to the repository.
    876 
    877     If we have a real absolute path name here we can try to do something smart:
    878     detecting the root of the checkout and truncating /path/to/checkout from
    879     the name so that we get header guards that don't include things like
    880     "C:\Documents and Settings\..." or "/home/username/..." in them and thus
    881     people on different computers who have checked the source out to different
    882     locations won't see bogus errors.
    883     """
    884     fullname = self.FullName()
    885 
    886     if os.path.exists(fullname):
    887       project_dir = os.path.dirname(fullname)
    888 
    889       if os.path.exists(os.path.join(project_dir, ".svn")):
    890         # If there's a .svn file in the current directory, we recursively look
    891         # up the directory tree for the top of the SVN checkout
    892         root_dir = project_dir
    893         one_up_dir = os.path.dirname(root_dir)
    894         while os.path.exists(os.path.join(one_up_dir, ".svn")):
    895           root_dir = os.path.dirname(root_dir)
    896           one_up_dir = os.path.dirname(one_up_dir)
    897 
    898         prefix = os.path.commonprefix([root_dir, project_dir])
    899         return fullname[len(prefix) + 1:]
    900 
    901       # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
    902       # searching up from the current path.
    903       root_dir = os.path.dirname(fullname)
    904       while (root_dir != os.path.dirname(root_dir) and
    905              not os.path.exists(os.path.join(root_dir, ".git")) and
    906              not os.path.exists(os.path.join(root_dir, ".hg")) and
    907              not os.path.exists(os.path.join(root_dir, ".svn"))):
    908         root_dir = os.path.dirname(root_dir)
    909 
    910       if (os.path.exists(os.path.join(root_dir, ".git")) or
    911           os.path.exists(os.path.join(root_dir, ".hg")) or
    912           os.path.exists(os.path.join(root_dir, ".svn"))):
    913         prefix = os.path.commonprefix([root_dir, project_dir])
    914         return fullname[len(prefix) + 1:]
    915 
    916     # Don't know what to do; header guard warnings may be wrong...
    917     return fullname
    918 
    919   def Split(self):
    920     """Splits the file into the directory, basename, and extension.
    921 
    922     For 'chrome/browser/browser.cc', Split() would
    923     return ('chrome/browser', 'browser', '.cc')
    924 
    925     Returns:
    926       A tuple of (directory, basename, extension).
    927     """
    928 
    929     googlename = self.RepositoryName()
    930     project, rest = os.path.split(googlename)
    931     return (project,) + os.path.splitext(rest)
    932 
    933   def BaseName(self):
    934     """File base name - text after the final slash, before the final period."""
    935     return self.Split()[1]
    936 
    937   def Extension(self):
    938     """File extension - text following the final period."""
    939     return self.Split()[2]
    940 
    941   def NoExtension(self):
    942     """File has no source file extension."""
    943     return '/'.join(self.Split()[0:2])
    944 
    945   def IsSource(self):
    946     """File has a source file extension."""
    947     return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
    948 
    949 
    950 def _ShouldPrintError(category, confidence, linenum):
    951   """If confidence >= verbose, category passes filter and is not suppressed."""
    952 
    953   # There are three ways we might decide not to print an error message:
    954   # a "NOLINT(category)" comment appears in the source,
    955   # the verbosity level isn't high enough, or the filters filter it out.
    956   if IsErrorSuppressedByNolint(category, linenum):
    957     return False
    958   if confidence < _cpplint_state.verbose_level:
    959     return False
    960 
    961   is_filtered = False
    962   for one_filter in _Filters():
    963     if one_filter.startswith('-'):
    964       if category.startswith(one_filter[1:]):
    965         is_filtered = True
    966     elif one_filter.startswith('+'):
    967       if category.startswith(one_filter[1:]):
    968         is_filtered = False
    969     else:
    970       assert False  # should have been checked for in SetFilter.
    971   if is_filtered:
    972     return False
    973 
    974   return True
    975 
    976 
    977 def Error(filename, linenum, category, confidence, message):
    978   """Logs the fact we've found a lint error.
    979 
    980   We log where the error was found, and also our confidence in the error,
    981   that is, how certain we are this is a legitimate style regression, and
    982   not a misidentification or a use that's sometimes justified.
    983 
    984   False positives can be suppressed by the use of
    985   "cpplint(category)"  comments on the offending line.  These are
    986   parsed into _error_suppressions.
    987 
    988   Args:
    989     filename: The name of the file containing the error.
    990     linenum: The number of the line containing the error.
    991     category: A string used to describe the "category" this bug
    992       falls under: "whitespace", say, or "runtime".  Categories
    993       may have a hierarchy separated by slashes: "whitespace/indent".
    994     confidence: A number from 1-5 representing a confidence score for
    995       the error, with 5 meaning that we are certain of the problem,
    996       and 1 meaning that it could be a legitimate construct.
    997     message: The error message.
    998   """
    999   if _ShouldPrintError(category, confidence, linenum):
   1000     _cpplint_state.IncrementErrorCount(category)
   1001     if _cpplint_state.output_format == 'vs7':
   1002       sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
   1003           filename, linenum, message, category, confidence))
   1004     elif _cpplint_state.output_format == 'eclipse':
   1005       sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
   1006           filename, linenum, message, category, confidence))
   1007     else:
   1008       sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
   1009           filename, linenum, message, category, confidence))
   1010 
   1011 
   1012 # Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
   1013 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
   1014     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
   1015 # Matches strings.  Escape codes should already be removed by ESCAPES.
   1016 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
   1017 # Matches characters.  Escape codes should already be removed by ESCAPES.
   1018 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
   1019 # Matches multi-line C++ comments.
   1020 # This RE is a little bit more complicated than one might expect, because we
   1021 # have to take care of space removals tools so we can handle comments inside
   1022 # statements better.
   1023 # The current rule is: We only clear spaces from both sides when we're at the
   1024 # end of the line. Otherwise, we try to remove spaces from the right side,
   1025 # if this doesn't work we try on left side but only if there's a non-character
   1026 # on the right.
   1027 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
   1028     r"""(\s*/\*.*\*/\s*$|
   1029             /\*.*\*/\s+|
   1030          \s+/\*.*\*/(?=\W)|
   1031             /\*.*\*/)""", re.VERBOSE)
   1032 
   1033 
   1034 def IsCppString(line):
   1035   """Does line terminate so, that the next symbol is in string constant.
   1036 
   1037   This function does not consider single-line nor multi-line comments.
   1038 
   1039   Args:
   1040     line: is a partial line of code starting from the 0..n.
   1041 
   1042   Returns:
   1043     True, if next character appended to 'line' is inside a
   1044     string constant.
   1045   """
   1046 
   1047   line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
   1048   return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
   1049 
   1050 
   1051 def CleanseRawStrings(raw_lines):
   1052   """Removes C++11 raw strings from lines.
   1053 
   1054     Before:
   1055       static const char kData[] = R"(
   1056           multi-line string
   1057           )";
   1058 
   1059     After:
   1060       static const char kData[] = ""
   1061           (replaced by blank line)
   1062           "";
   1063 
   1064   Args:
   1065     raw_lines: list of raw lines.
   1066 
   1067   Returns:
   1068     list of lines with C++11 raw strings replaced by empty strings.
   1069   """
   1070 
   1071   delimiter = None
   1072   lines_without_raw_strings = []
   1073   for line in raw_lines:
   1074     if delimiter:
   1075       # Inside a raw string, look for the end
   1076       end = line.find(delimiter)
   1077       if end >= 0:
   1078         # Found the end of the string, match leading space for this
   1079         # line and resume copying the original lines, and also insert
   1080         # a "" on the last line.
   1081         leading_space = Match(r'^(\s*)\S', line)
   1082         line = leading_space.group(1) + '""' + line[end + len(delimiter):]
   1083         delimiter = None
   1084       else:
   1085         # Haven't found the end yet, append a blank line.
   1086         line = ''
   1087 
   1088     else:
   1089       # Look for beginning of a raw string.
   1090       # See 2.14.15 [lex.string] for syntax.
   1091       matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
   1092       if matched:
   1093         delimiter = ')' + matched.group(2) + '"'
   1094 
   1095         end = matched.group(3).find(delimiter)
   1096         if end >= 0:
   1097           # Raw string ended on same line
   1098           line = (matched.group(1) + '""' +
   1099                   matched.group(3)[end + len(delimiter):])
   1100           delimiter = None
   1101         else:
   1102           # Start of a multi-line raw string
   1103           line = matched.group(1) + '""'
   1104 
   1105     lines_without_raw_strings.append(line)
   1106 
   1107   # TODO(unknown): if delimiter is not None here, we might want to
   1108   # emit a warning for unterminated string.
   1109   return lines_without_raw_strings
   1110 
   1111 
   1112 def FindNextMultiLineCommentStart(lines, lineix):
   1113   """Find the beginning marker for a multiline comment."""
   1114   while lineix < len(lines):
   1115     if lines[lineix].strip().startswith('/*'):
   1116       # Only return this marker if the comment goes beyond this line
   1117       if lines[lineix].strip().find('*/', 2) < 0:
   1118         return lineix
   1119     lineix += 1
   1120   return len(lines)
   1121 
   1122 
   1123 def FindNextMultiLineCommentEnd(lines, lineix):
   1124   """We are inside a comment, find the end marker."""
   1125   while lineix < len(lines):
   1126     if lines[lineix].strip().endswith('*/'):
   1127       return lineix
   1128     lineix += 1
   1129   return len(lines)
   1130 
   1131 
   1132 def RemoveMultiLineCommentsFromRange(lines, begin, end):
   1133   """Clears a range of lines for multi-line comments."""
   1134   # Having // dummy comments makes the lines non-empty, so we will not get
   1135   # unnecessary blank line warnings later in the code.
   1136   for i in range(begin, end):
   1137     lines[i] = '// dummy'
   1138 
   1139 
   1140 def RemoveMultiLineComments(filename, lines, error):
   1141   """Removes multiline (c-style) comments from lines."""
   1142   lineix = 0
   1143   while lineix < len(lines):
   1144     lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
   1145     if lineix_begin >= len(lines):
   1146       return
   1147     lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
   1148     if lineix_end >= len(lines):
   1149       error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
   1150             'Could not find end of multi-line comment')
   1151       return
   1152     RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
   1153     lineix = lineix_end + 1
   1154 
   1155 
   1156 def CleanseComments(line):
   1157   """Removes //-comments and single-line C-style /* */ comments.
   1158 
   1159   Args:
   1160     line: A line of C++ source.
   1161 
   1162   Returns:
   1163     The line with single-line comments removed.
   1164   """
   1165   commentpos = line.find('//')
   1166   if commentpos != -1 and not IsCppString(line[:commentpos]):
   1167     line = line[:commentpos].rstrip()
   1168   # get rid of /* ... */
   1169   return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
   1170 
   1171 
   1172 class CleansedLines(object):
   1173   """Holds 3 copies of all lines with different preprocessing applied to them.
   1174 
   1175   1) elided member contains lines without strings and comments,
   1176   2) lines member contains lines without comments, and
   1177   3) raw_lines member contains all the lines without processing.
   1178   All these three members are of <type 'list'>, and of the same length.
   1179   """
   1180 
   1181   def __init__(self, lines):
   1182     self.elided = []
   1183     self.lines = []
   1184     self.raw_lines = lines
   1185     self.num_lines = len(lines)
   1186     self.lines_without_raw_strings = CleanseRawStrings(lines)
   1187     for linenum in range(len(self.lines_without_raw_strings)):
   1188       self.lines.append(CleanseComments(
   1189           self.lines_without_raw_strings[linenum]))
   1190       elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
   1191       self.elided.append(CleanseComments(elided))
   1192 
   1193   def NumLines(self):
   1194     """Returns the number of lines represented."""
   1195     return self.num_lines
   1196 
   1197   @staticmethod
   1198   def _CollapseStrings(elided):
   1199     """Collapses strings and chars on a line to simple "" or '' blocks.
   1200 
   1201     We nix strings first so we're not fooled by text like '"http://"'
   1202 
   1203     Args:
   1204       elided: The line being processed.
   1205 
   1206     Returns:
   1207       The line with collapsed strings.
   1208     """
   1209     if not _RE_PATTERN_INCLUDE.match(elided):
   1210       # Remove escaped characters first to make quote/single quote collapsing
   1211       # basic.  Things that look like escaped characters shouldn't occur
   1212       # outside of strings and chars.
   1213       elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
   1214       elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
   1215       elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
   1216     return elided
   1217 
   1218 
   1219 def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
   1220   """Find the position just after the matching endchar.
   1221 
   1222   Args:
   1223     line: a CleansedLines line.
   1224     startpos: start searching at this position.
   1225     depth: nesting level at startpos.
   1226     startchar: expression opening character.
   1227     endchar: expression closing character.
   1228 
   1229   Returns:
   1230     On finding matching endchar: (index just after matching endchar, 0)
   1231     Otherwise: (-1, new depth at end of this line)
   1232   """
   1233   for i in xrange(startpos, len(line)):
   1234     if line[i] == startchar:
   1235       depth += 1
   1236     elif line[i] == endchar:
   1237       depth -= 1
   1238       if depth == 0:
   1239         return (i + 1, 0)
   1240   return (-1, depth)
   1241 
   1242 
   1243 def CloseExpression(clean_lines, linenum, pos):
   1244   """If input points to ( or { or [ or <, finds the position that closes it.
   1245 
   1246   If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
   1247   linenum/pos that correspond to the closing of the expression.
   1248 
   1249   Args:
   1250     clean_lines: A CleansedLines instance containing the file.
   1251     linenum: The number of the line to check.
   1252     pos: A position on the line.
   1253 
   1254   Returns:
   1255     A tuple (line, linenum, pos) pointer *past* the closing brace, or
   1256     (line, len(lines), -1) if we never find a close.  Note we ignore
   1257     strings and comments when matching; and the line we return is the
   1258     'cleansed' line at linenum.
   1259   """
   1260 
   1261   line = clean_lines.elided[linenum]
   1262   startchar = line[pos]
   1263   if startchar not in '({[<':
   1264     return (line, clean_lines.NumLines(), -1)
   1265   if startchar == '(': endchar = ')'
   1266   if startchar == '[': endchar = ']'
   1267   if startchar == '{': endchar = '}'
   1268   if startchar == '<': endchar = '>'
   1269 
   1270   # Check first line
   1271   (end_pos, num_open) = FindEndOfExpressionInLine(
   1272       line, pos, 0, startchar, endchar)
   1273   if end_pos > -1:
   1274     return (line, linenum, end_pos)
   1275 
   1276   # Continue scanning forward
   1277   while linenum < clean_lines.NumLines() - 1:
   1278     linenum += 1
   1279     line = clean_lines.elided[linenum]
   1280     (end_pos, num_open) = FindEndOfExpressionInLine(
   1281         line, 0, num_open, startchar, endchar)
   1282     if end_pos > -1:
   1283       return (line, linenum, end_pos)
   1284 
   1285   # Did not find endchar before end of file, give up
   1286   return (line, clean_lines.NumLines(), -1)
   1287 
   1288 
   1289 def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar):
   1290   """Find position at the matching startchar.
   1291 
   1292   This is almost the reverse of FindEndOfExpressionInLine, but note
   1293   that the input position and returned position differs by 1.
   1294 
   1295   Args:
   1296     line: a CleansedLines line.
   1297     endpos: start searching at this position.
   1298     depth: nesting level at endpos.
   1299     startchar: expression opening character.
   1300     endchar: expression closing character.
   1301 
   1302   Returns:
   1303     On finding matching startchar: (index at matching startchar, 0)
   1304     Otherwise: (-1, new depth at beginning of this line)
   1305   """
   1306   for i in xrange(endpos, -1, -1):
   1307     if line[i] == endchar:
   1308       depth += 1
   1309     elif line[i] == startchar:
   1310       depth -= 1
   1311       if depth == 0:
   1312         return (i, 0)
   1313   return (-1, depth)
   1314 
   1315 
   1316 def ReverseCloseExpression(clean_lines, linenum, pos):
   1317   """If input points to ) or } or ] or >, finds the position that opens it.
   1318 
   1319   If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
   1320   linenum/pos that correspond to the opening of the expression.
   1321 
   1322   Args:
   1323     clean_lines: A CleansedLines instance containing the file.
   1324     linenum: The number of the line to check.
   1325     pos: A position on the line.
   1326 
   1327   Returns:
   1328     A tuple (line, linenum, pos) pointer *at* the opening brace, or
   1329     (line, 0, -1) if we never find the matching opening brace.  Note
   1330     we ignore strings and comments when matching; and the line we
   1331     return is the 'cleansed' line at linenum.
   1332   """
   1333   line = clean_lines.elided[linenum]
   1334   endchar = line[pos]
   1335   if endchar not in ')}]>':
   1336     return (line, 0, -1)
   1337   if endchar == ')': startchar = '('
   1338   if endchar == ']': startchar = '['
   1339   if endchar == '}': startchar = '{'
   1340   if endchar == '>': startchar = '<'
   1341 
   1342   # Check last line
   1343   (start_pos, num_open) = FindStartOfExpressionInLine(
   1344       line, pos, 0, startchar, endchar)
   1345   if start_pos > -1:
   1346     return (line, linenum, start_pos)
   1347 
   1348   # Continue scanning backward
   1349   while linenum > 0:
   1350     linenum -= 1
   1351     line = clean_lines.elided[linenum]
   1352     (start_pos, num_open) = FindStartOfExpressionInLine(
   1353         line, len(line) - 1, num_open, startchar, endchar)
   1354     if start_pos > -1:
   1355       return (line, linenum, start_pos)
   1356 
   1357   # Did not find startchar before beginning of file, give up
   1358   return (line, 0, -1)
   1359 
   1360 
   1361 def CheckForCopyright(filename, lines, error):
   1362   """Logs an error if no Copyright message appears at the top of the file."""
   1363 
   1364   # We'll say it should occur by line 10. Don't forget there's a
   1365   # dummy line at the front.
   1366   for line in xrange(1, min(len(lines), 11)):
   1367     if re.search(r'Copyright', lines[line], re.I): break
   1368   else:                       # means no copyright line was found
   1369     error(filename, 0, 'legal/copyright', 5,
   1370           'No copyright message found.  '
   1371           'You should have a line: "Copyright [year] <Copyright Owner>"')
   1372 
   1373 
   1374 def GetHeaderGuardCPPVariable(filename):
   1375   """Returns the CPP variable that should be used as a header guard.
   1376 
   1377   Args:
   1378     filename: The name of a C++ header file.
   1379 
   1380   Returns:
   1381     The CPP variable that should be used as a header guard in the
   1382     named file.
   1383 
   1384   """
   1385 
   1386   # Restores original filename in case that cpplint is invoked from Emacs's
   1387   # flymake.
   1388   filename = re.sub(r'_flymake\.h$', '.h', filename)
   1389   filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
   1390 
   1391   fileinfo = FileInfo(filename)
   1392   file_path_from_root = fileinfo.RepositoryName()
   1393   if _root:
   1394     file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
   1395   return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
   1396 
   1397 
   1398 def CheckForHeaderGuard(filename, lines, error):
   1399   """Checks that the file contains a header guard.
   1400 
   1401   Logs an error if no #ifndef header guard is present.  For other
   1402   headers, checks that the full pathname is used.
   1403 
   1404   Args:
   1405     filename: The name of the C++ header file.
   1406     lines: An array of strings, each representing a line of the file.
   1407     error: The function to call with any errors found.
   1408   """
   1409 
   1410   cppvar = GetHeaderGuardCPPVariable(filename)
   1411 
   1412   ifndef = None
   1413   ifndef_linenum = 0
   1414   define = None
   1415   endif = None
   1416   endif_linenum = 0
   1417   for linenum, line in enumerate(lines):
   1418     linesplit = line.split()
   1419     if len(linesplit) >= 2:
   1420       # find the first occurrence of #ifndef and #define, save arg
   1421       if not ifndef and linesplit[0] == '#ifndef':
   1422         # set ifndef to the header guard presented on the #ifndef line.
   1423         ifndef = linesplit[1]
   1424         ifndef_linenum = linenum
   1425       if not define and linesplit[0] == '#define':
   1426         define = linesplit[1]
   1427     # find the last occurrence of #endif, save entire line
   1428     if line.startswith('#endif'):
   1429       endif = line
   1430       endif_linenum = linenum
   1431 
   1432   if not ifndef:
   1433     error(filename, 0, 'build/header_guard', 5,
   1434           'No #ifndef header guard found, suggested CPP variable is: %s' %
   1435           cppvar)
   1436     return
   1437 
   1438   if not define:
   1439     error(filename, 0, 'build/header_guard', 5,
   1440           'No #define header guard found, suggested CPP variable is: %s' %
   1441           cppvar)
   1442     return
   1443 
   1444   # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
   1445   # for backward compatibility.
   1446   if ifndef != cppvar:
   1447     error_level = 0
   1448     if ifndef != cppvar + '_':
   1449       error_level = 5
   1450 
   1451     ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
   1452                             error)
   1453     error(filename, ifndef_linenum, 'build/header_guard', error_level,
   1454           '#ifndef header guard has wrong style, please use: %s' % cppvar)
   1455 
   1456   if define != ifndef:
   1457     error(filename, 0, 'build/header_guard', 5,
   1458           '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
   1459           cppvar)
   1460     return
   1461 
   1462   if endif != ('#endif  // %s' % cppvar):
   1463     error_level = 0
   1464     if endif != ('#endif  // %s' % (cppvar + '_')):
   1465       error_level = 5
   1466 
   1467     ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
   1468                             error)
   1469     error(filename, endif_linenum, 'build/header_guard', error_level,
   1470           '#endif line should be "#endif  // %s"' % cppvar)
   1471 
   1472 
   1473 def CheckForBadCharacters(filename, lines, error):
   1474   """Logs an error for each line containing bad characters.
   1475 
   1476   Two kinds of bad characters:
   1477 
   1478   1. Unicode replacement characters: These indicate that either the file
   1479   contained invalid UTF-8 (likely) or Unicode replacement characters (which
   1480   it shouldn't).  Note that it's possible for this to throw off line
   1481   numbering if the invalid UTF-8 occurred adjacent to a newline.
   1482 
   1483   2. NUL bytes.  These are problematic for some tools.
   1484 
   1485   Args:
   1486     filename: The name of the current file.
   1487     lines: An array of strings, each representing a line of the file.
   1488     error: The function to call with any errors found.
   1489   """
   1490   for linenum, line in enumerate(lines):
   1491     if u'\ufffd' in line:
   1492       error(filename, linenum, 'readability/utf8', 5,
   1493             'Line contains invalid UTF-8 (or Unicode replacement character).')
   1494     if '\0' in line:
   1495       error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
   1496 
   1497 
   1498 def CheckForNewlineAtEOF(filename, lines, error):
   1499   """Logs an error if there is no newline char at the end of the file.
   1500 
   1501   Args:
   1502     filename: The name of the current file.
   1503     lines: An array of strings, each representing a line of the file.
   1504     error: The function to call with any errors found.
   1505   """
   1506 
   1507   # The array lines() was created by adding two newlines to the
   1508   # original file (go figure), then splitting on \n.
   1509   # To verify that the file ends in \n, we just have to make sure the
   1510   # last-but-two element of lines() exists and is empty.
   1511   if len(lines) < 3 or lines[-2]:
   1512     error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
   1513           'Could not find a newline character at the end of the file.')
   1514 
   1515 
   1516 def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
   1517   """Logs an error if we see /* ... */ or "..." that extend past one line.
   1518 
   1519   /* ... */ comments are legit inside macros, for one line.
   1520   Otherwise, we prefer // comments, so it's ok to warn about the
   1521   other.  Likewise, it's ok for strings to extend across multiple
   1522   lines, as long as a line continuation character (backslash)
   1523   terminates each line. Although not currently prohibited by the C++
   1524   style guide, it's ugly and unnecessary. We don't do well with either
   1525   in this lint program, so we warn about both.
   1526 
   1527   Args:
   1528     filename: The name of the current file.
   1529     clean_lines: A CleansedLines instance containing the file.
   1530     linenum: The number of the line to check.
   1531     error: The function to call with any errors found.
   1532   """
   1533   line = clean_lines.elided[linenum]
   1534 
   1535   # Remove all \\ (escaped backslashes) from the line. They are OK, and the
   1536   # second (escaped) slash may trigger later \" detection erroneously.
   1537   line = line.replace('\\\\', '')
   1538 
   1539   if line.count('/*') > line.count('*/'):
   1540     error(filename, linenum, 'readability/multiline_comment', 5,
   1541           'Complex multi-line /*...*/-style comment found. '
   1542           'Lint may give bogus warnings.  '
   1543           'Consider replacing these with //-style comments, '
   1544           'with #if 0...#endif, '
   1545           'or with more clearly structured multi-line comments.')
   1546 
   1547   if (line.count('"') - line.count('\\"')) % 2:
   1548     error(filename, linenum, 'readability/multiline_string', 5,
   1549           'Multi-line string ("...") found.  This lint script doesn\'t '
   1550           'do well with such strings, and may give bogus warnings.  '
   1551           'Use C++11 raw strings or concatenation instead.')
   1552 
   1553 
   1554 threading_list = (
   1555     ('asctime(', 'asctime_r('),
   1556     ('ctime(', 'ctime_r('),
   1557     ('getgrgid(', 'getgrgid_r('),
   1558     ('getgrnam(', 'getgrnam_r('),
   1559     ('getlogin(', 'getlogin_r('),
   1560     ('getpwnam(', 'getpwnam_r('),
   1561     ('getpwuid(', 'getpwuid_r('),
   1562     ('gmtime(', 'gmtime_r('),
   1563     ('localtime(', 'localtime_r('),
   1564     ('rand(', 'rand_r('),
   1565     ('strtok(', 'strtok_r('),
   1566     ('ttyname(', 'ttyname_r('),
   1567     )
   1568 
   1569 
   1570 def CheckPosixThreading(filename, clean_lines, linenum, error):
   1571   """Checks for calls to thread-unsafe functions.
   1572 
   1573   Much code has been originally written without consideration of
   1574   multi-threading. Also, engineers are relying on their old experience;
   1575   they have learned posix before threading extensions were added. These
   1576   tests guide the engineers to use thread-safe functions (when using
   1577   posix directly).
   1578 
   1579   Args:
   1580     filename: The name of the current file.
   1581     clean_lines: A CleansedLines instance containing the file.
   1582     linenum: The number of the line to check.
   1583     error: The function to call with any errors found.
   1584   """
   1585   line = clean_lines.elided[linenum]
   1586   for single_thread_function, multithread_safe_function in threading_list:
   1587     ix = line.find(single_thread_function)
   1588     # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
   1589     if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
   1590                                 line[ix - 1] not in ('_', '.', '>'))):
   1591       error(filename, linenum, 'runtime/threadsafe_fn', 2,
   1592             'Consider using ' + multithread_safe_function +
   1593             '...) instead of ' + single_thread_function +
   1594             '...) for improved thread safety.')
   1595 
   1596 
   1597 def CheckVlogArguments(filename, clean_lines, linenum, error):
   1598   """Checks that VLOG() is only used for defining a logging level.
   1599 
   1600   For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
   1601   VLOG(FATAL) are not.
   1602 
   1603   Args:
   1604     filename: The name of the current file.
   1605     clean_lines: A CleansedLines instance containing the file.
   1606     linenum: The number of the line to check.
   1607     error: The function to call with any errors found.
   1608   """
   1609   line = clean_lines.elided[linenum]
   1610   if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
   1611     error(filename, linenum, 'runtime/vlog', 5,
   1612           'VLOG() should be used with numeric verbosity level.  '
   1613           'Use LOG() if you want symbolic severity levels.')
   1614 
   1615 
   1616 # Matches invalid increment: *count++, which moves pointer instead of
   1617 # incrementing a value.
   1618 _RE_PATTERN_INVALID_INCREMENT = re.compile(
   1619     r'^\s*\*\w+(\+\+|--);')
   1620 
   1621 
   1622 def CheckInvalidIncrement(filename, clean_lines, linenum, error):
   1623   """Checks for invalid increment *count++.
   1624 
   1625   For example following function:
   1626   void increment_counter(int* count) {
   1627     *count++;
   1628   }
   1629   is invalid, because it effectively does count++, moving pointer, and should
   1630   be replaced with ++*count, (*count)++ or *count += 1.
   1631 
   1632   Args:
   1633     filename: The name of the current file.
   1634     clean_lines: A CleansedLines instance containing the file.
   1635     linenum: The number of the line to check.
   1636     error: The function to call with any errors found.
   1637   """
   1638   line = clean_lines.elided[linenum]
   1639   if _RE_PATTERN_INVALID_INCREMENT.match(line):
   1640     error(filename, linenum, 'runtime/invalid_increment', 5,
   1641           'Changing pointer instead of value (or unused value of operator*).')
   1642 
   1643 
   1644 class _BlockInfo(object):
   1645   """Stores information about a generic block of code."""
   1646 
   1647   def __init__(self, seen_open_brace):
   1648     self.seen_open_brace = seen_open_brace
   1649     self.open_parentheses = 0
   1650     self.inline_asm = _NO_ASM
   1651 
   1652   def CheckBegin(self, filename, clean_lines, linenum, error):
   1653     """Run checks that applies to text up to the opening brace.
   1654 
   1655     This is mostly for checking the text after the class identifier
   1656     and the "{", usually where the base class is specified.  For other
   1657     blocks, there isn't much to check, so we always pass.
   1658 
   1659     Args:
   1660       filename: The name of the current file.
   1661       clean_lines: A CleansedLines instance containing the file.
   1662       linenum: The number of the line to check.
   1663       error: The function to call with any errors found.
   1664     """
   1665     pass
   1666 
   1667   def CheckEnd(self, filename, clean_lines, linenum, error):
   1668     """Run checks that applies to text after the closing brace.
   1669 
   1670     This is mostly used for checking end of namespace comments.
   1671 
   1672     Args:
   1673       filename: The name of the current file.
   1674       clean_lines: A CleansedLines instance containing the file.
   1675       linenum: The number of the line to check.
   1676       error: The function to call with any errors found.
   1677     """
   1678     pass
   1679 
   1680 
   1681 class _ClassInfo(_BlockInfo):
   1682   """Stores information about a class."""
   1683 
   1684   def __init__(self, name, class_or_struct, clean_lines, linenum):
   1685     _BlockInfo.__init__(self, False)
   1686     self.name = name
   1687     self.starting_linenum = linenum
   1688     self.is_derived = False
   1689     if class_or_struct == 'struct':
   1690       self.access = 'public'
   1691       self.is_struct = True
   1692     else:
   1693       self.access = 'private'
   1694       self.is_struct = False
   1695 
   1696     # Remember initial indentation level for this class.  Using raw_lines here
   1697     # instead of elided to account for leading comments.
   1698     initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum])
   1699     if initial_indent:
   1700       self.class_indent = len(initial_indent.group(1))
   1701     else:
   1702       self.class_indent = 0
   1703 
   1704     # Try to find the end of the class.  This will be confused by things like:
   1705     #   class A {
   1706     #   } *x = { ...
   1707     #
   1708     # But it's still good enough for CheckSectionSpacing.
   1709     self.last_line = 0
   1710     depth = 0
   1711     for i in range(linenum, clean_lines.NumLines()):
   1712       line = clean_lines.elided[i]
   1713       depth += line.count('{') - line.count('}')
   1714       if not depth:
   1715         self.last_line = i
   1716         break
   1717 
   1718   def CheckBegin(self, filename, clean_lines, linenum, error):
   1719     # Look for a bare ':'
   1720     if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
   1721       self.is_derived = True
   1722 
   1723   def CheckEnd(self, filename, clean_lines, linenum, error):
   1724     # Check that closing brace is aligned with beginning of the class.
   1725     # Only do this if the closing brace is indented by only whitespaces.
   1726     # This means we will not check single-line class definitions.
   1727     indent = Match(r'^( *)\}', clean_lines.elided[linenum])
   1728     if indent and len(indent.group(1)) != self.class_indent:
   1729       if self.is_struct:
   1730         parent = 'struct ' + self.name
   1731       else:
   1732         parent = 'class ' + self.name
   1733       error(filename, linenum, 'whitespace/indent', 3,
   1734             'Closing brace should be aligned with beginning of %s' % parent)
   1735 
   1736 
   1737 class _NamespaceInfo(_BlockInfo):
   1738   """Stores information about a namespace."""
   1739 
   1740   def __init__(self, name, linenum):
   1741     _BlockInfo.__init__(self, False)
   1742     self.name = name or ''
   1743     self.starting_linenum = linenum
   1744 
   1745   def CheckEnd(self, filename, clean_lines, linenum, error):
   1746     """Check end of namespace comments."""
   1747     line = clean_lines.raw_lines[linenum]
   1748 
   1749     # Check how many lines is enclosed in this namespace.  Don't issue
   1750     # warning for missing namespace comments if there aren't enough
   1751     # lines.  However, do apply checks if there is already an end of
   1752     # namespace comment and it's incorrect.
   1753     #
   1754     # TODO(unknown): We always want to check end of namespace comments
   1755     # if a namespace is large, but sometimes we also want to apply the
   1756     # check if a short namespace contained nontrivial things (something
   1757     # other than forward declarations).  There is currently no logic on
   1758     # deciding what these nontrivial things are, so this check is
   1759     # triggered by namespace size only, which works most of the time.
   1760     if (linenum - self.starting_linenum < 10
   1761         and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
   1762       return
   1763 
   1764     # Look for matching comment at end of namespace.
   1765     #
   1766     # Note that we accept C style "/* */" comments for terminating
   1767     # namespaces, so that code that terminate namespaces inside
   1768     # preprocessor macros can be cpplint clean.
   1769     #
   1770     # We also accept stuff like "// end of namespace <name>." with the
   1771     # period at the end.
   1772     #
   1773     # Besides these, we don't accept anything else, otherwise we might
   1774     # get false negatives when existing comment is a substring of the
   1775     # expected namespace.
   1776     if self.name:
   1777       # Named namespace
   1778       if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
   1779                     r'[\*/\.\\\s]*$'),
   1780                    line):
   1781         error(filename, linenum, 'readability/namespace', 5,
   1782               'Namespace should be terminated with "// namespace %s"' %
   1783               self.name)
   1784     else:
   1785       # Anonymous namespace
   1786       if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
   1787         error(filename, linenum, 'readability/namespace', 5,
   1788               'Namespace should be terminated with "// namespace"')
   1789 
   1790 
   1791 class _PreprocessorInfo(object):
   1792   """Stores checkpoints of nesting stacks when #if/#else is seen."""
   1793 
   1794   def __init__(self, stack_before_if):
   1795     # The entire nesting stack before #if
   1796     self.stack_before_if = stack_before_if
   1797 
   1798     # The entire nesting stack up to #else
   1799     self.stack_before_else = []
   1800 
   1801     # Whether we have already seen #else or #elif
   1802     self.seen_else = False
   1803 
   1804 
   1805 class _NestingState(object):
   1806   """Holds states related to parsing braces."""
   1807 
   1808   def __init__(self):
   1809     # Stack for tracking all braces.  An object is pushed whenever we
   1810     # see a "{", and popped when we see a "}".  Only 3 types of
   1811     # objects are possible:
   1812     # - _ClassInfo: a class or struct.
   1813     # - _NamespaceInfo: a namespace.
   1814     # - _BlockInfo: some other type of block.
   1815     self.stack = []
   1816 
   1817     # Stack of _PreprocessorInfo objects.
   1818     self.pp_stack = []
   1819 
   1820   def SeenOpenBrace(self):
   1821     """Check if we have seen the opening brace for the innermost block.
   1822 
   1823     Returns:
   1824       True if we have seen the opening brace, False if the innermost
   1825       block is still expecting an opening brace.
   1826     """
   1827     return (not self.stack) or self.stack[-1].seen_open_brace
   1828 
   1829   def InNamespaceBody(self):
   1830     """Check if we are currently one level inside a namespace body.
   1831 
   1832     Returns:
   1833       True if top of the stack is a namespace block, False otherwise.
   1834     """
   1835     return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
   1836 
   1837   def UpdatePreprocessor(self, line):
   1838     """Update preprocessor stack.
   1839 
   1840     We need to handle preprocessors due to classes like this:
   1841       #ifdef SWIG
   1842       struct ResultDetailsPageElementExtensionPoint {
   1843       #else
   1844       struct ResultDetailsPageElementExtensionPoint : public Extension {
   1845       #endif
   1846 
   1847     We make the following assumptions (good enough for most files):
   1848     - Preprocessor condition evaluates to true from #if up to first
   1849       #else/#elif/#endif.
   1850 
   1851     - Preprocessor condition evaluates to false from #else/#elif up
   1852       to #endif.  We still perform lint checks on these lines, but
   1853       these do not affect nesting stack.
   1854 
   1855     Args:
   1856       line: current line to check.
   1857     """
   1858     if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
   1859       # Beginning of #if block, save the nesting stack here.  The saved
   1860       # stack will allow us to restore the parsing state in the #else case.
   1861       self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
   1862     elif Match(r'^\s*#\s*(else|elif)\b', line):
   1863       # Beginning of #else block
   1864       if self.pp_stack:
   1865         if not self.pp_stack[-1].seen_else:
   1866           # This is the first #else or #elif block.  Remember the
   1867           # whole nesting stack up to this point.  This is what we
   1868           # keep after the #endif.
   1869           self.pp_stack[-1].seen_else = True
   1870           self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
   1871 
   1872         # Restore the stack to how it was before the #if
   1873         self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
   1874       else:
   1875         # TODO(unknown): unexpected #else, issue warning?
   1876         pass
   1877     elif Match(r'^\s*#\s*endif\b', line):
   1878       # End of #if or #else blocks.
   1879       if self.pp_stack:
   1880         # If we saw an #else, we will need to restore the nesting
   1881         # stack to its former state before the #else, otherwise we
   1882         # will just continue from where we left off.
   1883         if self.pp_stack[-1].seen_else:
   1884           # Here we can just use a shallow copy since we are the last
   1885           # reference to it.
   1886           self.stack = self.pp_stack[-1].stack_before_else
   1887         # Drop the corresponding #if
   1888         self.pp_stack.pop()
   1889       else:
   1890         # TODO(unknown): unexpected #endif, issue warning?
   1891         pass
   1892 
   1893   def Update(self, filename, clean_lines, linenum, error):
   1894     """Update nesting state with current line.
   1895 
   1896     Args:
   1897       filename: The name of the current file.
   1898       clean_lines: A CleansedLines instance containing the file.
   1899       linenum: The number of the line to check.
   1900       error: The function to call with any errors found.
   1901     """
   1902     line = clean_lines.elided[linenum]
   1903 
   1904     # Update pp_stack first
   1905     self.UpdatePreprocessor(line)
   1906 
   1907     # Count parentheses.  This is to avoid adding struct arguments to
   1908     # the nesting stack.
   1909     if self.stack:
   1910       inner_block = self.stack[-1]
   1911       depth_change = line.count('(') - line.count(')')
   1912       inner_block.open_parentheses += depth_change
   1913 
   1914       # Also check if we are starting or ending an inline assembly block.
   1915       if inner_block.inline_asm in (_NO_ASM, _END_ASM):
   1916         if (depth_change != 0 and
   1917             inner_block.open_parentheses == 1 and
   1918             _MATCH_ASM.match(line)):
   1919           # Enter assembly block
   1920           inner_block.inline_asm = _INSIDE_ASM
   1921         else:
   1922           # Not entering assembly block.  If previous line was _END_ASM,
   1923           # we will now shift to _NO_ASM state.
   1924           inner_block.inline_asm = _NO_ASM
   1925       elif (inner_block.inline_asm == _INSIDE_ASM and
   1926             inner_block.open_parentheses == 0):
   1927         # Exit assembly block
   1928         inner_block.inline_asm = _END_ASM
   1929 
   1930     # Consume namespace declaration at the beginning of the line.  Do
   1931     # this in a loop so that we catch same line declarations like this:
   1932     #   namespace proto2 { namespace bridge { class MessageSet; } }
   1933     while True:
   1934       # Match start of namespace.  The "\b\s*" below catches namespace
   1935       # declarations even if it weren't followed by a whitespace, this
   1936       # is so that we don't confuse our namespace checker.  The
   1937       # missing spaces will be flagged by CheckSpacing.
   1938       namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
   1939       if not namespace_decl_match:
   1940         break
   1941 
   1942       new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
   1943       self.stack.append(new_namespace)
   1944 
   1945       line = namespace_decl_match.group(2)
   1946       if line.find('{') != -1:
   1947         new_namespace.seen_open_brace = True
   1948         line = line[line.find('{') + 1:]
   1949 
   1950     # Look for a class declaration in whatever is left of the line
   1951     # after parsing namespaces.  The regexp accounts for decorated classes
   1952     # such as in:
   1953     #   class LOCKABLE API Object {
   1954     #   };
   1955     #
   1956     # Templates with class arguments may confuse the parser, for example:
   1957     #   template <class T
   1958     #             class Comparator = less<T>,
   1959     #             class Vector = vector<T> >
   1960     #   class HeapQueue {
   1961     #
   1962     # Because this parser has no nesting state about templates, by the
   1963     # time it saw "class Comparator", it may think that it's a new class.
   1964     # Nested templates have a similar problem:
   1965     #   template <
   1966     #       typename ExportedType,
   1967     #       typename TupleType,
   1968     #       template <typename, typename> class ImplTemplate>
   1969     #
   1970     # To avoid these cases, we ignore classes that are followed by '=' or '>'
   1971     class_decl_match = Match(
   1972         r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
   1973         r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
   1974         r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line)
   1975     if (class_decl_match and
   1976         (not self.stack or self.stack[-1].open_parentheses == 0)):
   1977       self.stack.append(_ClassInfo(
   1978           class_decl_match.group(4), class_decl_match.group(2),
   1979           clean_lines, linenum))
   1980       line = class_decl_match.group(5)
   1981 
   1982     # If we have not yet seen the opening brace for the innermost block,
   1983     # run checks here.
   1984     if not self.SeenOpenBrace():
   1985       self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
   1986 
   1987     # Update access control if we are inside a class/struct
   1988     if self.stack and isinstance(self.stack[-1], _ClassInfo):
   1989       classinfo = self.stack[-1]
   1990       access_match = Match(
   1991           r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
   1992           r':(?:[^:]|$)',
   1993           line)
   1994       if access_match:
   1995         classinfo.access = access_match.group(2)
   1996 
   1997         # Check that access keywords are indented +1 space.  Skip this
   1998         # check if the keywords are not preceded by whitespaces.
   1999         indent = access_match.group(1)
   2000         if (len(indent) != classinfo.class_indent + 1 and
   2001             Match(r'^\s*$', indent)):
   2002           if classinfo.is_struct:
   2003             parent = 'struct ' + classinfo.name
   2004           else:
   2005             parent = 'class ' + classinfo.name
   2006           slots = ''
   2007           if access_match.group(3):
   2008             slots = access_match.group(3)
   2009           error(filename, linenum, 'whitespace/indent', 3,
   2010                 '%s%s: should be indented +1 space inside %s' % (
   2011                     access_match.group(2), slots, parent))
   2012 
   2013     # Consume braces or semicolons from what's left of the line
   2014     while True:
   2015       # Match first brace, semicolon, or closed parenthesis.
   2016       matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
   2017       if not matched:
   2018         break
   2019 
   2020       token = matched.group(1)
   2021       if token == '{':
   2022         # If namespace or class hasn't seen a opening brace yet, mark
   2023         # namespace/class head as complete.  Push a new block onto the
   2024         # stack otherwise.
   2025         if not self.SeenOpenBrace():
   2026           self.stack[-1].seen_open_brace = True
   2027         else:
   2028           self.stack.append(_BlockInfo(True))
   2029           if _MATCH_ASM.match(line):
   2030             self.stack[-1].inline_asm = _BLOCK_ASM
   2031       elif token == ';' or token == ')':
   2032         # If we haven't seen an opening brace yet, but we already saw
   2033         # a semicolon, this is probably a forward declaration.  Pop
   2034         # the stack for these.
   2035         #
   2036         # Similarly, if we haven't seen an opening brace yet, but we
   2037         # already saw a closing parenthesis, then these are probably
   2038         # function arguments with extra "class" or "struct" keywords.
   2039         # Also pop these stack for these.
   2040         if not self.SeenOpenBrace():
   2041           self.stack.pop()
   2042       else:  # token == '}'
   2043         # Perform end of block checks and pop the stack.
   2044         if self.stack:
   2045           self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
   2046           self.stack.pop()
   2047       line = matched.group(2)
   2048 
   2049   def InnermostClass(self):
   2050     """Get class info on the top of the stack.
   2051 
   2052     Returns:
   2053       A _ClassInfo object if we are inside a class, or None otherwise.
   2054     """
   2055     for i in range(len(self.stack), 0, -1):
   2056       classinfo = self.stack[i - 1]
   2057       if isinstance(classinfo, _ClassInfo):
   2058         return classinfo
   2059     return None
   2060 
   2061   def CheckCompletedBlocks(self, filename, error):
   2062     """Checks that all classes and namespaces have been completely parsed.
   2063 
   2064     Call this when all lines in a file have been processed.
   2065     Args:
   2066       filename: The name of the current file.
   2067       error: The function to call with any errors found.
   2068     """
   2069     # Note: This test can result in false positives if #ifdef constructs
   2070     # get in the way of brace matching. See the testBuildClass test in
   2071     # cpplint_unittest.py for an example of this.
   2072     for obj in self.stack:
   2073       if isinstance(obj, _ClassInfo):
   2074         error(filename, obj.starting_linenum, 'build/class', 5,
   2075               'Failed to find complete declaration of class %s' %
   2076               obj.name)
   2077       elif isinstance(obj, _NamespaceInfo):
   2078         error(filename, obj.starting_linenum, 'build/namespaces', 5,
   2079               'Failed to find complete declaration of namespace %s' %
   2080               obj.name)
   2081 
   2082 
   2083 def CheckForNonStandardConstructs(filename, clean_lines, linenum,
   2084                                   nesting_state, error):
   2085   r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
   2086 
   2087   Complain about several constructs which gcc-2 accepts, but which are
   2088   not standard C++.  Warning about these in lint is one way to ease the
   2089   transition to new compilers.
   2090   - put storage class first (e.g. "static const" instead of "const static").
   2091   - "%lld" instead of %qd" in printf-type functions.
   2092   - "%1$d" is non-standard in printf-type functions.
   2093   - "\%" is an undefined character escape sequence.
   2094   - text after #endif is not allowed.
   2095   - invalid inner-style forward declaration.
   2096   - >? and <? operators, and their >?= and <?= cousins.
   2097 
   2098   Additionally, check for constructor/destructor style violations and reference
   2099   members, as it is very convenient to do so while checking for
   2100   gcc-2 compliance.
   2101 
   2102   Args:
   2103     filename: The name of the current file.
   2104     clean_lines: A CleansedLines instance containing the file.
   2105     linenum: The number of the line to check.
   2106     nesting_state: A _NestingState instance which maintains information about
   2107                    the current stack of nested blocks being parsed.
   2108     error: A callable to which errors are reported, which takes 4 arguments:
   2109            filename, line number, error level, and message
   2110   """
   2111 
   2112   # Remove comments from the line, but leave in strings for now.
   2113   line = clean_lines.lines[linenum]
   2114 
   2115   if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
   2116     error(filename, linenum, 'runtime/printf_format', 3,
   2117           '%q in format strings is deprecated.  Use %ll instead.')
   2118 
   2119   if Search(r'printf\s*\(.*".*%\d+\$', line):
   2120     error(filename, linenum, 'runtime/printf_format', 2,
   2121           '%N$ formats are unconventional.  Try rewriting to avoid them.')
   2122 
   2123   # Remove escaped backslashes before looking for undefined escapes.
   2124   line = line.replace('\\\\', '')
   2125 
   2126   if Search(r'("|\').*\\(%|\[|\(|{)', line):
   2127     error(filename, linenum, 'build/printf_format', 3,
   2128           '%, [, (, and { are undefined character escapes.  Unescape them.')
   2129 
   2130   # For the rest, work with both comments and strings removed.
   2131   line = clean_lines.elided[linenum]
   2132 
   2133   if Search(r'\b(const|volatile|void|char|short|int|long'
   2134             r'|float|double|signed|unsigned'
   2135             r'|schar|u?int8|u?int16|u?int32|u?int64)'
   2136             r'\s+(register|static|extern|typedef)\b',
   2137             line):
   2138     error(filename, linenum, 'build/storage_class', 5,
   2139           'Storage class (static, extern, typedef, etc) should be first.')
   2140 
   2141   if Match(r'\s*#\s*endif\s*[^/\s]+', line):
   2142     error(filename, linenum, 'build/endif_comment', 5,
   2143           'Uncommented text after #endif is non-standard.  Use a comment.')
   2144 
   2145   if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
   2146     error(filename, linenum, 'build/forward_decl', 5,
   2147           'Inner-style forward declarations are invalid.  Remove this line.')
   2148 
   2149   if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
   2150             line):
   2151     error(filename, linenum, 'build/deprecated', 3,
   2152           '>? and <? (max and min) operators are non-standard and deprecated.')
   2153 
   2154   if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
   2155     # TODO(unknown): Could it be expanded safely to arbitrary references,
   2156     # without triggering too many false positives? The first
   2157     # attempt triggered 5 warnings for mostly benign code in the regtest, hence
   2158     # the restriction.
   2159     # Here's the original regexp, for the reference:
   2160     # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
   2161     # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
   2162     error(filename, linenum, 'runtime/member_string_references', 2,
   2163           'const string& members are dangerous. It is much better to use '
   2164           'alternatives, such as pointers or simple constants.')
   2165 
   2166   # Everything else in this function operates on class declarations.
   2167   # Return early if the top of the nesting stack is not a class, or if
   2168   # the class head is not completed yet.
   2169   classinfo = nesting_state.InnermostClass()
   2170   if not classinfo or not classinfo.seen_open_brace:
   2171     return
   2172 
   2173   # The class may have been declared with namespace or classname qualifiers.
   2174   # The constructor and destructor will not have those qualifiers.
   2175   base_classname = classinfo.name.split('::')[-1]
   2176 
   2177   # Look for single-argument constructors that aren't marked explicit.
   2178   # Technically a valid construct, but against style.
   2179   args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
   2180                % re.escape(base_classname),
   2181                line)
   2182   if (args and
   2183       args.group(1) != 'void' and
   2184       not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&'
   2185                 % re.escape(base_classname), args.group(1).strip())):
   2186     error(filename, linenum, 'runtime/explicit', 5,
   2187           'Single-argument constructors should be marked explicit.')
   2188 
   2189 
   2190 def CheckSpacingForFunctionCall(filename, line, linenum, error):
   2191   """Checks for the correctness of various spacing around function calls.
   2192 
   2193   Args:
   2194     filename: The name of the current file.
   2195     line: The text of the line to check.
   2196     linenum: The number of the line to check.
   2197     error: The function to call with any errors found.
   2198   """
   2199 
   2200   # Since function calls often occur inside if/for/while/switch
   2201   # expressions - which have their own, more liberal conventions - we
   2202   # first see if we should be looking inside such an expression for a
   2203   # function call, to which we can apply more strict standards.
   2204   fncall = line    # if there's no control flow construct, look at whole line
   2205   for pattern in (r'\bif\s*\((.*)\)\s*{',
   2206                   r'\bfor\s*\((.*)\)\s*{',
   2207                   r'\bwhile\s*\((.*)\)\s*[{;]',
   2208                   r'\bswitch\s*\((.*)\)\s*{'):
   2209     match = Search(pattern, line)
   2210     if match:
   2211       fncall = match.group(1)    # look inside the parens for function calls
   2212       break
   2213 
   2214   # Except in if/for/while/switch, there should never be space
   2215   # immediately inside parens (eg "f( 3, 4 )").  We make an exception
   2216   # for nested parens ( (a+b) + c ).  Likewise, there should never be
   2217   # a space before a ( when it's a function argument.  I assume it's a
   2218   # function argument when the char before the whitespace is legal in
   2219   # a function name (alnum + _) and we're not starting a macro. Also ignore
   2220   # pointers and references to arrays and functions coz they're too tricky:
   2221   # we use a very simple way to recognize these:
   2222   # " (something)(maybe-something)" or
   2223   # " (something)(maybe-something," or
   2224   # " (something)[something]"
   2225   # Note that we assume the contents of [] to be short enough that
   2226   # they'll never need to wrap.
   2227   if (  # Ignore control structures.
   2228       not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
   2229                  fncall) and
   2230       # Ignore pointers/references to functions.
   2231       not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
   2232       # Ignore pointers/references to arrays.
   2233       not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
   2234     if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
   2235       error(filename, linenum, 'whitespace/parens', 4,
   2236             'Extra space after ( in function call')
   2237     elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
   2238       error(filename, linenum, 'whitespace/parens', 2,
   2239             'Extra space after (')
   2240     if (Search(r'\w\s+\(', fncall) and
   2241         not Search(r'#\s*define|typedef', fncall) and
   2242         not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)):
   2243       error(filename, linenum, 'whitespace/parens', 4,
   2244             'Extra space before ( in function call')
   2245     # If the ) is followed only by a newline or a { + newline, assume it's
   2246     # part of a control statement (if/while/etc), and don't complain
   2247     if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
   2248       # If the closing parenthesis is preceded by only whitespaces,
   2249       # try to give a more descriptive error message.
   2250       if Search(r'^\s+\)', fncall):
   2251         error(filename, linenum, 'whitespace/parens', 2,
   2252               'Closing ) should be moved to the previous line')
   2253       else:
   2254         error(filename, linenum, 'whitespace/parens', 2,
   2255               'Extra space before )')
   2256 
   2257 
   2258 def IsBlankLine(line):
   2259   """Returns true if the given line is blank.
   2260 
   2261   We consider a line to be blank if the line is empty or consists of
   2262   only white spaces.
   2263 
   2264   Args:
   2265     line: A line of a string.
   2266 
   2267   Returns:
   2268     True, if the given line is blank.
   2269   """
   2270   return not line or line.isspace()
   2271 
   2272 
   2273 def CheckForFunctionLengths(filename, clean_lines, linenum,
   2274                             function_state, error):
   2275   """Reports for long function bodies.
   2276 
   2277   For an overview why this is done, see:
   2278   http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
   2279 
   2280   Uses a simplistic algorithm assuming other style guidelines
   2281   (especially spacing) are followed.
   2282   Only checks unindented functions, so class members are unchecked.
   2283   Trivial bodies are unchecked, so constructors with huge initializer lists
   2284   may be missed.
   2285   Blank/comment lines are not counted so as to avoid encouraging the removal
   2286   of vertical space and comments just to get through a lint check.
   2287   NOLINT *on the last line of a function* disables this check.
   2288 
   2289   Args:
   2290     filename: The name of the current file.
   2291     clean_lines: A CleansedLines instance containing the file.
   2292     linenum: The number of the line to check.
   2293     function_state: Current function name and lines in body so far.
   2294     error: The function to call with any errors found.
   2295   """
   2296   lines = clean_lines.lines
   2297   line = lines[linenum]
   2298   raw = clean_lines.raw_lines
   2299   raw_line = raw[linenum]
   2300   joined_line = ''
   2301 
   2302   starting_func = False
   2303   regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
   2304   match_result = Match(regexp, line)
   2305   if match_result:
   2306     # If the name is all caps and underscores, figure it's a macro and
   2307     # ignore it, unless it's TEST or TEST_F.
   2308     function_name = match_result.group(1).split()[-1]
   2309     if function_name == 'TEST' or function_name == 'TEST_F' or (
   2310         not Match(r'[A-Z_]+$', function_name)):
   2311       starting_func = True
   2312 
   2313   if starting_func:
   2314     body_found = False
   2315     for start_linenum in xrange(linenum, clean_lines.NumLines()):
   2316       start_line = lines[start_linenum]
   2317       joined_line += ' ' + start_line.lstrip()
   2318       if Search(r'(;|})', start_line):  # Declarations and trivial functions
   2319         body_found = True
   2320         break                              # ... ignore
   2321       elif Search(r'{', start_line):
   2322         body_found = True
   2323         function = Search(r'((\w|:)*)\(', line).group(1)
   2324         if Match(r'TEST', function):    # Handle TEST... macros
   2325           parameter_regexp = Search(r'(\(.*\))', joined_line)
   2326           if parameter_regexp:             # Ignore bad syntax
   2327             function += parameter_regexp.group(1)
   2328         else:
   2329           function += '()'
   2330         function_state.Begin(function)
   2331         break
   2332     if not body_found:
   2333       # No body for the function (or evidence of a non-function) was found.
   2334       error(filename, linenum, 'readability/fn_size', 5,
   2335             'Lint failed to find start of function body.')
   2336   elif Match(r'^\}\s*$', line):  # function end
   2337     function_state.Check(error, filename, linenum)
   2338     function_state.End()
   2339   elif not Match(r'^\s*$', line):
   2340     function_state.Count()  # Count non-blank/non-comment lines.
   2341 
   2342 
   2343 _RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
   2344 
   2345 
   2346 def CheckComment(comment, filename, linenum, error):
   2347   """Checks for common mistakes in TODO comments.
   2348 
   2349   Args:
   2350     comment: The text of the comment from the line in question.
   2351     filename: The name of the current file.
   2352     linenum: The number of the line to check.
   2353     error: The function to call with any errors found.
   2354   """
   2355   match = _RE_PATTERN_TODO.match(comment)
   2356   if match:
   2357     # One whitespace is correct; zero whitespace is handled elsewhere.
   2358     leading_whitespace = match.group(1)
   2359     if len(leading_whitespace) > 1:
   2360       error(filename, linenum, 'whitespace/todo', 2,
   2361             'Too many spaces before TODO')
   2362 
   2363     username = match.group(2)
   2364     if not username:
   2365       error(filename, linenum, 'readability/todo', 2,
   2366             'Missing username in TODO; it should look like '
   2367             '"// TODO(my_username): Stuff."')
   2368 
   2369     middle_whitespace = match.group(3)
   2370     # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
   2371     if middle_whitespace != ' ' and middle_whitespace != '':
   2372       error(filename, linenum, 'whitespace/todo', 2,
   2373             'TODO(my_username) should be followed by a space')
   2374 
   2375 def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
   2376   """Checks for improper use of DISALLOW* macros.
   2377 
   2378   Args:
   2379     filename: The name of the current file.
   2380     clean_lines: A CleansedLines instance containing the file.
   2381     linenum: The number of the line to check.
   2382     nesting_state: A _NestingState instance which maintains information about
   2383                    the current stack of nested blocks being parsed.
   2384     error: The function to call with any errors found.
   2385   """
   2386   line = clean_lines.elided[linenum]  # get rid of comments and strings
   2387 
   2388   matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
   2389                    r'DISALLOW_EVIL_CONSTRUCTORS|'
   2390                    r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
   2391   if not matched:
   2392     return
   2393   if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
   2394     if nesting_state.stack[-1].access != 'private':
   2395       error(filename, linenum, 'readability/constructors', 3,
   2396             '%s must be in the private: section' % matched.group(1))
   2397 
   2398   else:
   2399     # Found DISALLOW* macro outside a class declaration, or perhaps it
   2400     # was used inside a function when it should have been part of the
   2401     # class declaration.  We could issue a warning here, but it
   2402     # probably resulted in a compiler error already.
   2403     pass
   2404 
   2405 
   2406 def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
   2407   """Find the corresponding > to close a template.
   2408 
   2409   Args:
   2410     clean_lines: A CleansedLines instance containing the file.
   2411     linenum: Current line number.
   2412     init_suffix: Remainder of the current line after the initial <.
   2413 
   2414   Returns:
   2415     True if a matching bracket exists.
   2416   """
   2417   line = init_suffix
   2418   nesting_stack = ['<']
   2419   while True:
   2420     # Find the next operator that can tell us whether < is used as an
   2421     # opening bracket or as a less-than operator.  We only want to
   2422     # warn on the latter case.
   2423     #
   2424     # We could also check all other operators and terminate the search
   2425     # early, e.g. if we got something like this "a<b+c", the "<" is
   2426     # most likely a less-than operator, but then we will get false
   2427     # positives for default arguments and other template expressions.
   2428     match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
   2429     if match:
   2430       # Found an operator, update nesting stack
   2431       operator = match.group(1)
   2432       line = match.group(2)
   2433 
   2434       if nesting_stack[-1] == '<':
   2435         # Expecting closing angle bracket
   2436         if operator in ('<', '(', '['):
   2437           nesting_stack.append(operator)
   2438         elif operator == '>':
   2439           nesting_stack.pop()
   2440           if not nesting_stack:
   2441             # Found matching angle bracket
   2442             return True
   2443         elif operator == ',':
   2444           # Got a comma after a bracket, this is most likely a template
   2445           # argument.  We have not seen a closing angle bracket yet, but
   2446           # it's probably a few lines later if we look for it, so just
   2447           # return early here.
   2448           return True
   2449         else:
   2450           # Got some other operator.
   2451           return False
   2452 
   2453       else:
   2454         # Expecting closing parenthesis or closing bracket
   2455         if operator in ('<', '(', '['):
   2456           nesting_stack.append(operator)
   2457         elif operator in (')', ']'):
   2458           # We don't bother checking for matching () or [].  If we got
   2459           # something like (] or [), it would have been a syntax error.
   2460           nesting_stack.pop()
   2461 
   2462     else:
   2463       # Scan the next line
   2464       linenum += 1
   2465       if linenum >= len(clean_lines.elided):
   2466         break
   2467       line = clean_lines.elided[linenum]
   2468 
   2469   # Exhausted all remaining lines and still no matching angle bracket.
   2470   # Most likely the input was incomplete, otherwise we should have
   2471   # seen a semicolon and returned early.
   2472   return True
   2473 
   2474 
   2475 def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
   2476   """Find the corresponding < that started a template.
   2477 
   2478   Args:
   2479     clean_lines: A CleansedLines instance containing the file.
   2480     linenum: Current line number.
   2481     init_prefix: Part of the current line before the initial >.
   2482 
   2483   Returns:
   2484     True if a matching bracket exists.
   2485   """
   2486   line = init_prefix
   2487   nesting_stack = ['>']
   2488   while True:
   2489     # Find the previous operator
   2490     match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
   2491     if match:
   2492       # Found an operator, update nesting stack
   2493       operator = match.group(2)
   2494       line = match.group(1)
   2495 
   2496       if nesting_stack[-1] == '>':
   2497         # Expecting opening angle bracket
   2498         if operator in ('>', ')', ']'):
   2499           nesting_stack.append(operator)
   2500         elif operator == '<':
   2501           nesting_stack.pop()
   2502           if not nesting_stack:
   2503             # Found matching angle bracket
   2504             return True
   2505         elif operator == ',':
   2506           # Got a comma before a bracket, this is most likely a
   2507           # template argument.  The opening angle bracket is probably
   2508           # there if we look for it, so just return early here.
   2509           return True
   2510         else:
   2511           # Got some other operator.
   2512           return False
   2513 
   2514       else:
   2515         # Expecting opening parenthesis or opening bracket
   2516         if operator in ('>', ')', ']'):
   2517           nesting_stack.append(operator)
   2518         elif operator in ('(', '['):
   2519           nesting_stack.pop()
   2520 
   2521     else:
   2522       # Scan the previous line
   2523       linenum -= 1
   2524       if linenum < 0:
   2525         break
   2526       line = clean_lines.elided[linenum]
   2527 
   2528   # Exhausted all earlier lines and still no matching angle bracket.
   2529   return False
   2530 
   2531 
   2532 def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
   2533   """Checks for the correctness of various spacing issues in the code.
   2534 
   2535   Things we check for: spaces around operators, spaces after
   2536   if/for/while/switch, no spaces around parens in function calls, two
   2537   spaces between code and comment, don't start a block with a blank
   2538   line, don't end a function with a blank line, don't add a blank line
   2539   after public/protected/private, don't have too many blank lines in a row.
   2540 
   2541   Args:
   2542     filename: The name of the current file.
   2543     clean_lines: A CleansedLines instance containing the file.
   2544     linenum: The number of the line to check.
   2545     nesting_state: A _NestingState instance which maintains information about
   2546                    the current stack of nested blocks being parsed.
   2547     error: The function to call with any errors found.
   2548   """
   2549 
   2550   # Don't use "elided" lines here, otherwise we can't check commented lines.
   2551   # Don't want to use "raw" either, because we don't want to check inside C++11
   2552   # raw strings,
   2553   raw = clean_lines.lines_without_raw_strings
   2554   line = raw[linenum]
   2555 
   2556   # Before nixing comments, check if the line is blank for no good
   2557   # reason.  This includes the first line after a block is opened, and
   2558   # blank lines at the end of a function (ie, right before a line like '}'
   2559   #
   2560   # Skip all the blank line checks if we are immediately inside a
   2561   # namespace body.  In other words, don't issue blank line warnings
   2562   # for this block:
   2563   #   namespace {
   2564   #
   2565   #   }
   2566   #
   2567   # A warning about missing end of namespace comments will be issued instead.
   2568   if IsBlankLine(line) and not nesting_state.InNamespaceBody():
   2569     elided = clean_lines.elided
   2570     prev_line = elided[linenum - 1]
   2571     prevbrace = prev_line.rfind('{')
   2572     # TODO(unknown): Don't complain if line before blank line, and line after,
   2573     #                both start with alnums and are indented the same amount.
   2574     #                This ignores whitespace at the start of a namespace block
   2575     #                because those are not usually indented.
   2576     if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
   2577       # OK, we have a blank line at the start of a code block.  Before we
   2578       # complain, we check if it is an exception to the rule: The previous
   2579       # non-empty line has the parameters of a function header that are indented
   2580       # 4 spaces (because they did not fit in a 80 column line when placed on
   2581       # the same line as the function name).  We also check for the case where
   2582       # the previous line is indented 6 spaces, which may happen when the
   2583       # initializers of a constructor do not fit into a 80 column line.
   2584       exception = False
   2585       if Match(r' {6}\w', prev_line):  # Initializer list?
   2586         # We are looking for the opening column of initializer list, which
   2587         # should be indented 4 spaces to cause 6 space indentation afterwards.
   2588         search_position = linenum-2
   2589         while (search_position >= 0
   2590                and Match(r' {6}\w', elided[search_position])):
   2591           search_position -= 1
   2592         exception = (search_position >= 0
   2593                      and elided[search_position][:5] == '    :')
   2594       else:
   2595         # Search for the function arguments or an initializer list.  We use a
   2596         # simple heuristic here: If the line is indented 4 spaces; and we have a
   2597         # closing paren, without the opening paren, followed by an opening brace
   2598         # or colon (for initializer lists) we assume that it is the last line of
   2599         # a function header.  If we have a colon indented 4 spaces, it is an
   2600         # initializer list.
   2601         exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
   2602                            prev_line)
   2603                      or Match(r' {4}:', prev_line))
   2604 
   2605       if not exception:
   2606         error(filename, linenum, 'whitespace/blank_line', 2,
   2607               'Redundant blank line at the start of a code block '
   2608               'should be deleted.')
   2609     # Ignore blank lines at the end of a block in a long if-else
   2610     # chain, like this:
   2611     #   if (condition1) {
   2612     #     // Something followed by a blank line
   2613     #
   2614     #   } else if (condition2) {
   2615     #     // Something else
   2616     #   }
   2617     if linenum + 1 < clean_lines.NumLines():
   2618       next_line = raw[linenum + 1]
   2619       if (next_line
   2620           and Match(r'\s*}', next_line)
   2621           and next_line.find('} else ') == -1):
   2622         error(filename, linenum, 'whitespace/blank_line', 3,
   2623               'Redundant blank line at the end of a code block '
   2624               'should be deleted.')
   2625 
   2626     matched = Match(r'\s*(public|protected|private):', prev_line)
   2627     if matched:
   2628       error(filename, linenum, 'whitespace/blank_line', 3,
   2629             'Do not leave a blank line after "%s:"' % matched.group(1))
   2630 
   2631   # Next, we complain if there's a comment too near the text
   2632   commentpos = line.find('//')
   2633   if commentpos != -1:
   2634     # Check if the // may be in quotes.  If so, ignore it
   2635     # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison
   2636     if (line.count('"', 0, commentpos) -
   2637         line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
   2638       # Allow one space for new scopes, two spaces otherwise:
   2639       if (not Match(r'^\s*{ //', line) and
   2640           ((commentpos >= 1 and
   2641             line[commentpos-1] not in string.whitespace) or
   2642            (commentpos >= 2 and
   2643             line[commentpos-2] not in string.whitespace))):
   2644         error(filename, linenum, 'whitespace/comments', 2,
   2645               'At least two spaces is best between code and comments')
   2646       # There should always be a space between the // and the comment
   2647       commentend = commentpos + 2
   2648       if commentend < len(line) and not line[commentend] == ' ':
   2649         # but some lines are exceptions -- e.g. if they're big
   2650         # comment delimiters like:
   2651         # //----------------------------------------------------------
   2652         # or are an empty C++ style Doxygen comment, like:
   2653         # ///
   2654         # or C++ style Doxygen comments placed after the variable:
   2655         # ///<  Header comment
   2656         # //!<  Header comment
   2657         # or they begin with multiple slashes followed by a space:
   2658         # //////// Header comment
   2659         match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
   2660                  Search(r'^/$', line[commentend:]) or
   2661                  Search(r'^!< ', line[commentend:]) or
   2662                  Search(r'^/< ', line[commentend:]) or
   2663                  Search(r'^/+ ', line[commentend:]))
   2664         if not match:
   2665           error(filename, linenum, 'whitespace/comments', 4,
   2666                 'Should have a space between // and comment')
   2667       CheckComment(line[commentpos:], filename, linenum, error)
   2668 
   2669   line = clean_lines.elided[linenum]  # get rid of comments and strings
   2670 
   2671   # Don't try to do spacing checks for operator methods
   2672   line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
   2673 
   2674   # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
   2675   # Otherwise not.  Note we only check for non-spaces on *both* sides;
   2676   # sometimes people put non-spaces on one side when aligning ='s among
   2677   # many lines (not that this is behavior that I approve of...)
   2678   if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
   2679     error(filename, linenum, 'whitespace/operators', 4,
   2680           'Missing spaces around =')
   2681 
   2682   # It's ok not to have spaces around binary operators like + - * /, but if
   2683   # there's too little whitespace, we get concerned.  It's hard to tell,
   2684   # though, so we punt on this one for now.  TODO.
   2685 
   2686   # You should always have whitespace around binary operators.
   2687   #
   2688   # Check <= and >= first to avoid false positives with < and >, then
   2689   # check non-include lines for spacing around < and >.
   2690   match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
   2691   if match:
   2692     error(filename, linenum, 'whitespace/operators', 3,
   2693           'Missing spaces around %s' % match.group(1))
   2694   # We allow no-spaces around << when used like this: 10<<20, but
   2695   # not otherwise (particularly, not when used as streams)
   2696   # Also ignore using ns::operator<<;
   2697   match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
   2698   if (match and
   2699       not (match.group(1).isdigit() and match.group(2).isdigit()) and
   2700       not (match.group(1) == 'operator' and match.group(2) == ';')):
   2701     error(filename, linenum, 'whitespace/operators', 3,
   2702           'Missing spaces around <<')
   2703   elif not Match(r'#.*include', line):
   2704     # Avoid false positives on ->
   2705     reduced_line = line.replace('->', '')
   2706 
   2707     # Look for < that is not surrounded by spaces.  This is only
   2708     # triggered if both sides are missing spaces, even though
   2709     # technically should should flag if at least one side is missing a
   2710     # space.  This is done to avoid some false positives with shifts.
   2711     match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
   2712     if (match and
   2713         not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
   2714       error(filename, linenum, 'whitespace/operators', 3,
   2715             'Missing spaces around <')
   2716 
   2717     # Look for > that is not surrounded by spaces.  Similar to the
   2718     # above, we only trigger if both sides are missing spaces to avoid
   2719     # false positives with shifts.
   2720     match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
   2721     if (match and
   2722         not FindPreviousMatchingAngleBracket(clean_lines, linenum,
   2723                                              match.group(1))):
   2724       error(filename, linenum, 'whitespace/operators', 3,
   2725             'Missing spaces around >')
   2726 
   2727   # We allow no-spaces around >> for almost anything.  This is because
   2728   # C++11 allows ">>" to close nested templates, which accounts for
   2729   # most cases when ">>" is not followed by a space.
   2730   #
   2731   # We still warn on ">>" followed by alpha character, because that is
   2732   # likely due to ">>" being used for right shifts, e.g.:
   2733   #   value >> alpha
   2734   #
   2735   # When ">>" is used to close templates, the alphanumeric letter that
   2736   # follows would be part of an identifier, and there should still be
   2737   # a space separating the template type and the identifier.
   2738   #   type<type<type>> alpha
   2739   match = Search(r'>>[a-zA-Z_]', line)
   2740   if match:
   2741     error(filename, linenum, 'whitespace/operators', 3,
   2742           'Missing spaces around >>')
   2743 
   2744   # There shouldn't be space around unary operators
   2745   match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
   2746   if match:
   2747     error(filename, linenum, 'whitespace/operators', 4,
   2748           'Extra space for operator %s' % match.group(1))
   2749 
   2750   # A pet peeve of mine: no spaces after an if, while, switch, or for
   2751   match = Search(r' (if\(|for\(|while\(|switch\()', line)
   2752   if match:
   2753     error(filename, linenum, 'whitespace/parens', 5,
   2754           'Missing space before ( in %s' % match.group(1))
   2755 
   2756   # For if/for/while/switch, the left and right parens should be
   2757   # consistent about how many spaces are inside the parens, and
   2758   # there should either be zero or one spaces inside the parens.
   2759   # We don't want: "if ( foo)" or "if ( foo   )".
   2760   # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   2761   match = Search(r'\b(if|for|while|switch)\s*'
   2762                  r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
   2763                  line)
   2764   if match:
   2765     if len(match.group(2)) != len(match.group(4)):
   2766       if not (match.group(3) == ';' and
   2767               len(match.group(2)) == 1 + len(match.group(4)) or
   2768               not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
   2769         error(filename, linenum, 'whitespace/parens', 5,
   2770               'Mismatching spaces inside () in %s' % match.group(1))
   2771     if len(match.group(2)) not in [0, 1]:
   2772       error(filename, linenum, 'whitespace/parens', 5,
   2773             'Should have zero or one spaces inside ( and ) in %s' %
   2774             match.group(1))
   2775 
   2776   # You should always have a space after a comma (either as fn arg or operator)
   2777   #
   2778   # This does not apply when the non-space character following the
   2779   # comma is another comma, since the only time when that happens is
   2780   # for empty macro arguments.
   2781   #
   2782   # We run this check in two passes: first pass on elided lines to
   2783   # verify that lines contain missing whitespaces, second pass on raw
   2784   # lines to confirm that those missing whitespaces are not due to
   2785   # elided comments.
   2786   if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]):
   2787     error(filename, linenum, 'whitespace/comma', 3,
   2788           'Missing space after ,')
   2789 
   2790   # You should always have a space after a semicolon
   2791   # except for few corner cases
   2792   # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
   2793   # space after ;
   2794   if Search(r';[^\s};\\)/]', line):
   2795     error(filename, linenum, 'whitespace/semicolon', 3,
   2796           'Missing space after ;')
   2797 
   2798   # Next we will look for issues with function calls.
   2799   CheckSpacingForFunctionCall(filename, line, linenum, error)
   2800 
   2801   # Except after an opening paren, or after another opening brace (in case of
   2802   # an initializer list, for instance), you should have spaces before your
   2803   # braces. And since you should never have braces at the beginning of a line,
   2804   # this is an easy test.
   2805   match = Match(r'^(.*[^ ({]){', line)
   2806   if match:
   2807     # Try a bit harder to check for brace initialization.  This
   2808     # happens in one of the following forms:
   2809     #   Constructor() : initializer_list_{} { ... }
   2810     #   Constructor{}.MemberFunction()
   2811     #   Type variable{};
   2812     #   FunctionCall(type{}, ...);
   2813     #   LastArgument(..., type{});
   2814     #   LOG(INFO) << type{} << " ...";
   2815     #   map_of_type[{...}] = ...;
   2816     #
   2817     # We check for the character following the closing brace, and
   2818     # silence the warning if it's one of those listed above, i.e.
   2819     # "{.;,)<]".
   2820     #
   2821     # To account for nested initializer list, we allow any number of
   2822     # closing braces up to "{;,)<".  We can't simply silence the
   2823     # warning on first sight of closing brace, because that would
   2824     # cause false negatives for things that are not initializer lists.
   2825     #   Silence this:         But not this:
   2826     #     Outer{                if (...) {
   2827     #       Inner{...}            if (...){  // Missing space before {
   2828     #     };                    }
   2829     #
   2830     # There is a false negative with this approach if people inserted
   2831     # spurious semicolons, e.g. "if (cond){};", but we will catch the
   2832     # spurious semicolon with a separate check.
   2833     (endline, endlinenum, endpos) = CloseExpression(
   2834         clean_lines, linenum, len(match.group(1)))
   2835     trailing_text = ''
   2836     if endpos > -1:
   2837       trailing_text = endline[endpos:]
   2838     for offset in xrange(endlinenum + 1,
   2839                          min(endlinenum + 3, clean_lines.NumLines() - 1)):
   2840       trailing_text += clean_lines.elided[offset]
   2841     if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text):
   2842       error(filename, linenum, 'whitespace/braces', 5,
   2843             'Missing space before {')
   2844 
   2845   # Make sure '} else {' has spaces.
   2846   if Search(r'}else', line):
   2847     error(filename, linenum, 'whitespace/braces', 5,
   2848           'Missing space before else')
   2849 
   2850   # You shouldn't have spaces before your brackets, except maybe after
   2851   # 'delete []' or 'new char * []'.
   2852   if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
   2853     error(filename, linenum, 'whitespace/braces', 5,
   2854           'Extra space before [')
   2855 
   2856   # You shouldn't have a space before a semicolon at the end of the line.
   2857   # There's a special case for "for" since the style guide allows space before
   2858   # the semicolon there.
   2859   if Search(r':\s*;\s*$', line):
   2860     error(filename, linenum, 'whitespace/semicolon', 5,
   2861           'Semicolon defining empty statement. Use {} instead.')
   2862   elif Search(r'^\s*;\s*$', line):
   2863     error(filename, linenum, 'whitespace/semicolon', 5,
   2864           'Line contains only semicolon. If this should be an empty statement, '
   2865           'use {} instead.')
   2866   elif (Search(r'\s+;\s*$', line) and
   2867         not Search(r'\bfor\b', line)):
   2868     error(filename, linenum, 'whitespace/semicolon', 5,
   2869           'Extra space before last semicolon. If this should be an empty '
   2870           'statement, use {} instead.')
   2871 
   2872   # In range-based for, we wanted spaces before and after the colon, but
   2873   # not around "::" tokens that might appear.
   2874   if (Search('for *\(.*[^:]:[^: ]', line) or
   2875       Search('for *\(.*[^: ]:[^:]', line)):
   2876     error(filename, linenum, 'whitespace/forcolon', 2,
   2877           'Missing space around colon in range-based for loop')
   2878 
   2879 
   2880 def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
   2881   """Checks for additional blank line issues related to sections.
   2882 
   2883   Currently the only thing checked here is blank line before protected/private.
   2884 
   2885   Args:
   2886     filename: The name of the current file.
   2887     clean_lines: A CleansedLines instance containing the file.
   2888     class_info: A _ClassInfo objects.
   2889     linenum: The number of the line to check.
   2890     error: The function to call with any errors found.
   2891   """
   2892   # Skip checks if the class is small, where small means 25 lines or less.
   2893   # 25 lines seems like a good cutoff since that's the usual height of
   2894   # terminals, and any class that can't fit in one screen can't really
   2895   # be considered "small".
   2896   #
   2897   # Also skip checks if we are on the first line.  This accounts for
   2898   # classes that look like
   2899   #   class Foo { public: ... };
   2900   #
   2901   # If we didn't find the end of the class, last_line would be zero,
   2902   # and the check will be skipped by the first condition.
   2903   if (class_info.last_line - class_info.starting_linenum <= 24 or
   2904       linenum <= class_info.starting_linenum):
   2905     return
   2906 
   2907   matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
   2908   if matched:
   2909     # Issue warning if the line before public/protected/private was
   2910     # not a blank line, but don't do this if the previous line contains
   2911     # "class" or "struct".  This can happen two ways:
   2912     #  - We are at the beginning of the class.
   2913     #  - We are forward-declaring an inner class that is semantically
   2914     #    private, but needed to be public for implementation reasons.
   2915     # Also ignores cases where the previous line ends with a backslash as can be
   2916     # common when defining classes in C macros.
   2917     prev_line = clean_lines.lines[linenum - 1]
   2918     if (not IsBlankLine(prev_line) and
   2919         not Search(r'\b(class|struct)\b', prev_line) and
   2920         not Search(r'\\$', prev_line)):
   2921       # Try a bit harder to find the beginning of the class.  This is to
   2922       # account for multi-line base-specifier lists, e.g.:
   2923       #   class Derived
   2924       #       : public Base {
   2925       end_class_head = class_info.starting_linenum
   2926       for i in range(class_info.starting_linenum, linenum):
   2927         if Search(r'\{\s*$', clean_lines.lines[i]):
   2928           end_class_head = i
   2929           break
   2930       if end_class_head < linenum - 1:
   2931         error(filename, linenum, 'whitespace/blank_line', 3,
   2932               '"%s:" should be preceded by a blank line' % matched.group(1))
   2933 
   2934 
   2935 def GetPreviousNonBlankLine(clean_lines, linenum):
   2936   """Return the most recent non-blank line and its line number.
   2937 
   2938   Args:
   2939     clean_lines: A CleansedLines instance containing the file contents.
   2940     linenum: The number of the line to check.
   2941 
   2942   Returns:
   2943     A tuple with two elements.  The first element is the contents of the last
   2944     non-blank line before the current line, or the empty string if this is the
   2945     first non-blank line.  The second is the line number of that line, or -1
   2946     if this is the first non-blank line.
   2947   """
   2948 
   2949   prevlinenum = linenum - 1
   2950   while prevlinenum >= 0:
   2951     prevline = clean_lines.elided[prevlinenum]
   2952     if not IsBlankLine(prevline):     # if not a blank line...
   2953       return (prevline, prevlinenum)
   2954     prevlinenum -= 1
   2955   return ('', -1)
   2956 
   2957 
   2958 def CheckBraces(filename, clean_lines, linenum, error):
   2959   """Looks for misplaced braces (e.g. at the end of line).
   2960 
   2961   Args:
   2962     filename: The name of the current file.
   2963     clean_lines: A CleansedLines instance containing the file.
   2964     linenum: The number of the line to check.
   2965     error: The function to call with any errors found.
   2966   """
   2967 
   2968   line = clean_lines.elided[linenum]        # get rid of comments and strings
   2969 
   2970   if Match(r'\s*{\s*$', line):
   2971     # We allow an open brace to start a line in the case where someone is using
   2972     # braces in a block to explicitly create a new scope, which is commonly used
   2973     # to control the lifetime of stack-allocated variables.  Braces are also
   2974     # used for brace initializers inside function calls.  We don't detect this
   2975     # perfectly: we just don't complain if the last non-whitespace character on
   2976     # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
   2977     # previous line starts a preprocessor block.
   2978     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   2979     if (not Search(r'[,;:}{(]\s*$', prevline) and
   2980         not Match(r'\s*#', prevline)):
   2981       error(filename, linenum, 'whitespace/braces', 4,
   2982             '{ should almost always be at the end of the previous line')
   2983 
   2984   # An else clause should be on the same line as the preceding closing brace.
   2985   if Match(r'\s*else\s*', line):
   2986     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   2987     if Match(r'\s*}\s*$', prevline):
   2988       error(filename, linenum, 'whitespace/newline', 4,
   2989             'An else should appear on the same line as the preceding }')
   2990 
   2991   # If braces come on one side of an else, they should be on both.
   2992   # However, we have to worry about "else if" that spans multiple lines!
   2993   if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
   2994     if Search(r'}\s*else if([^{]*)$', line):       # could be multi-line if
   2995       # find the ( after the if
   2996       pos = line.find('else if')
   2997       pos = line.find('(', pos)
   2998       if pos > 0:
   2999         (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
   3000         if endline[endpos:].find('{') == -1:    # must be brace after if
   3001           error(filename, linenum, 'readability/braces', 5,
   3002                 'If an else has a brace on one side, it should have it on both')
   3003     else:            # common case: else not followed by a multi-line if
   3004       error(filename, linenum, 'readability/braces', 5,
   3005             'If an else has a brace on one side, it should have it on both')
   3006 
   3007   # Likewise, an else should never have the else clause on the same line
   3008   if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
   3009     error(filename, linenum, 'whitespace/newline', 4,
   3010           'Else clause should never be on same line as else (use 2 lines)')
   3011 
   3012   # In the same way, a do/while should never be on one line
   3013   if Match(r'\s*do [^\s{]', line):
   3014     error(filename, linenum, 'whitespace/newline', 4,
   3015           'do/while clauses should not be on a single line')
   3016 
   3017   # Block bodies should not be followed by a semicolon.  Due to C++11
   3018   # brace initialization, there are more places where semicolons are
   3019   # required than not, so we use a whitelist approach to check these
   3020   # rather than a blacklist.  These are the places where "};" should
   3021   # be replaced by just "}":
   3022   # 1. Some flavor of block following closing parenthesis:
   3023   #    for (;;) {};
   3024   #    while (...) {};
   3025   #    switch (...) {};
   3026   #    Function(...) {};
   3027   #    if (...) {};
   3028   #    if (...) else if (...) {};
   3029   #
   3030   # 2. else block:
   3031   #    if (...) else {};
   3032   #
   3033   # 3. const member function:
   3034   #    Function(...) const {};
   3035   #
   3036   # 4. Block following some statement:
   3037   #    x = 42;
   3038   #    {};
   3039   #
   3040   # 5. Block at the beginning of a function:
   3041   #    Function(...) {
   3042   #      {};
   3043   #    }
   3044   #
   3045   #    Note that naively checking for the preceding "{" will also match
   3046   #    braces inside multi-dimensional arrays, but this is fine since
   3047   #    that expression will not contain semicolons.
   3048   #
   3049   # 6. Block following another block:
   3050   #    while (true) {}
   3051   #    {};
   3052   #
   3053   # 7. End of namespaces:
   3054   #    namespace {};
   3055   #
   3056   #    These semicolons seems far more common than other kinds of
   3057   #    redundant semicolons, possibly due to people converting classes
   3058   #    to namespaces.  For now we do not warn for this case.
   3059   #
   3060   # Try matching case 1 first.
   3061   match = Match(r'^(.*\)\s*)\{', line)
   3062   if match:
   3063     # Matched closing parenthesis (case 1).  Check the token before the
   3064     # matching opening parenthesis, and don't warn if it looks like a
   3065     # macro.  This avoids these false positives:
   3066     #  - macro that defines a base class
   3067     #  - multi-line macro that defines a base class
   3068     #  - macro that defines the whole class-head
   3069     #
   3070     # But we still issue warnings for macros that we know are safe to
   3071     # warn, specifically:
   3072     #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
   3073     #  - TYPED_TEST
   3074     #  - INTERFACE_DEF
   3075     #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
   3076     #
   3077     # We implement a whitelist of safe macros instead of a blacklist of
   3078     # unsafe macros, even though the latter appears less frequently in
   3079     # google code and would have been easier to implement.  This is because
   3080     # the downside for getting the whitelist wrong means some extra
   3081     # semicolons, while the downside for getting the blacklist wrong
   3082     # would result in compile errors.
   3083     #
   3084     # In addition to macros, we also don't want to warn on compound
   3085     # literals.
   3086     closing_brace_pos = match.group(1).rfind(')')
   3087     opening_parenthesis = ReverseCloseExpression(
   3088         clean_lines, linenum, closing_brace_pos)
   3089     if opening_parenthesis[2] > -1:
   3090       line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
   3091       macro = Search(r'\b([A-Z_]+)\s*$', line_prefix)
   3092       if ((macro and
   3093            macro.group(1) not in (
   3094                'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
   3095                'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
   3096                'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
   3097           Search(r'\s+=\s*$', line_prefix)):
   3098         match = None
   3099 
   3100   else:
   3101     # Try matching cases 2-3.
   3102     match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
   3103     if not match:
   3104       # Try matching cases 4-6.  These are always matched on separate lines.
   3105       #
   3106       # Note that we can't simply concatenate the previous line to the
   3107       # current line and do a single match, otherwise we may output
   3108       # duplicate warnings for the blank line case:
   3109       #   if (cond) {
   3110       #     // blank line
   3111       #   }
   3112       prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   3113       if prevline and Search(r'[;{}]\s*$', prevline):
   3114         match = Match(r'^(\s*)\{', line)
   3115 
   3116   # Check matching closing brace
   3117   if match:
   3118     (endline, endlinenum, endpos) = CloseExpression(
   3119         clean_lines, linenum, len(match.group(1)))
   3120     if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
   3121       # Current {} pair is eligible for semicolon check, and we have found
   3122       # the redundant semicolon, output warning here.
   3123       #
   3124       # Note: because we are scanning forward for opening braces, and
   3125       # outputting warnings for the matching closing brace, if there are
   3126       # nested blocks with trailing semicolons, we will get the error
   3127       # messages in reversed order.
   3128       error(filename, endlinenum, 'readability/braces', 4,
   3129             "You don't need a ; after a }")
   3130 
   3131 
   3132 def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
   3133   """Look for empty loop/conditional body with only a single semicolon.
   3134 
   3135   Args:
   3136     filename: The name of the current file.
   3137     clean_lines: A CleansedLines instance containing the file.
   3138     linenum: The number of the line to check.
   3139     error: The function to call with any errors found.
   3140   """
   3141 
   3142   # Search for loop keywords at the beginning of the line.  Because only
   3143   # whitespaces are allowed before the keywords, this will also ignore most
   3144   # do-while-loops, since those lines should start with closing brace.
   3145   #
   3146   # We also check "if" blocks here, since an empty conditional block
   3147   # is likely an error.
   3148   line = clean_lines.elided[linenum]
   3149   matched = Match(r'\s*(for|while|if)\s*\(', line)
   3150   if matched:
   3151     # Find the end of the conditional expression
   3152     (end_line, end_linenum, end_pos) = CloseExpression(
   3153         clean_lines, linenum, line.find('('))
   3154 
   3155     # Output warning if what follows the condition expression is a semicolon.
   3156     # No warning for all other cases, including whitespace or newline, since we
   3157     # have a separate check for semicolons preceded by whitespace.
   3158     if end_pos >= 0 and Match(r';', end_line[end_pos:]):
   3159       if matched.group(1) == 'if':
   3160         error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
   3161               'Empty conditional bodies should use {}')
   3162       else:
   3163         error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
   3164               'Empty loop bodies should use {} or continue')
   3165 
   3166 
   3167 def CheckCheck(filename, clean_lines, linenum, error):
   3168   """Checks the use of CHECK and EXPECT macros.
   3169 
   3170   Args:
   3171     filename: The name of the current file.
   3172     clean_lines: A CleansedLines instance containing the file.
   3173     linenum: The number of the line to check.
   3174     error: The function to call with any errors found.
   3175   """
   3176 
   3177   # Decide the set of replacement macros that should be suggested
   3178   lines = clean_lines.elided
   3179   check_macro = None
   3180   start_pos = -1
   3181   for macro in _CHECK_MACROS:
   3182     i = lines[linenum].find(macro)
   3183     if i >= 0:
   3184       check_macro = macro
   3185 
   3186       # Find opening parenthesis.  Do a regular expression match here
   3187       # to make sure that we are matching the expected CHECK macro, as
   3188       # opposed to some other macro that happens to contain the CHECK
   3189       # substring.
   3190       matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum])
   3191       if not matched:
   3192         continue
   3193       start_pos = len(matched.group(1))
   3194       break
   3195   if not check_macro or start_pos < 0:
   3196     # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
   3197     return
   3198 
   3199   # Find end of the boolean expression by matching parentheses
   3200   (last_line, end_line, end_pos) = CloseExpression(
   3201       clean_lines, linenum, start_pos)
   3202   if end_pos < 0:
   3203     return
   3204   if linenum == end_line:
   3205     expression = lines[linenum][start_pos + 1:end_pos - 1]
   3206   else:
   3207     expression = lines[linenum][start_pos + 1:]
   3208     for i in xrange(linenum + 1, end_line):
   3209       expression += lines[i]
   3210     expression += last_line[0:end_pos - 1]
   3211 
   3212   # Parse expression so that we can take parentheses into account.
   3213   # This avoids false positives for inputs like "CHECK((a < 4) == b)",
   3214   # which is not replaceable by CHECK_LE.
   3215   lhs = ''
   3216   rhs = ''
   3217   operator = None
   3218   while expression:
   3219     matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
   3220                     r'==|!=|>=|>|<=|<|\()(.*)$', expression)
   3221     if matched:
   3222       token = matched.group(1)
   3223       if token == '(':
   3224         # Parenthesized operand
   3225         expression = matched.group(2)
   3226         (end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')')
   3227         if end < 0:
   3228           return  # Unmatched parenthesis
   3229         lhs += '(' + expression[0:end]
   3230         expression = expression[end:]
   3231       elif token in ('&&', '||'):
   3232         # Logical and/or operators.  This means the expression
   3233         # contains more than one term, for example:
   3234         #   CHECK(42 < a && a < b);
   3235         #
   3236         # These are not replaceable with CHECK_LE, so bail out early.
   3237         return
   3238       elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
   3239         # Non-relational operator
   3240         lhs += token
   3241         expression = matched.group(2)
   3242       else:
   3243         # Relational operator
   3244         operator = token
   3245         rhs = matched.group(2)
   3246         break
   3247     else:
   3248       # Unparenthesized operand.  Instead of appending to lhs one character
   3249       # at a time, we do another regular expression match to consume several
   3250       # characters at once if possible.  Trivial benchmark shows that this
   3251       # is more efficient when the operands are longer than a single
   3252       # character, which is generally the case.
   3253       matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
   3254       if not matched:
   3255         matched = Match(r'^(\s*\S)(.*)$', expression)
   3256         if not matched:
   3257           break
   3258       lhs += matched.group(1)
   3259       expression = matched.group(2)
   3260 
   3261   # Only apply checks if we got all parts of the boolean expression
   3262   if not (lhs and operator and rhs):
   3263     return
   3264 
   3265   # Check that rhs do not contain logical operators.  We already know
   3266   # that lhs is fine since the loop above parses out && and ||.
   3267   if rhs.find('&&') > -1 or rhs.find('||') > -1:
   3268     return
   3269 
   3270   # At least one of the operands must be a constant literal.  This is
   3271   # to avoid suggesting replacements for unprintable things like
   3272   # CHECK(variable != iterator)
   3273   #
   3274   # The following pattern matches decimal, hex integers, strings, and
   3275   # characters (in that order).
   3276   lhs = lhs.strip()
   3277   rhs = rhs.strip()
   3278   match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
   3279   if Match(match_constant, lhs) or Match(match_constant, rhs):
   3280     # Note: since we know both lhs and rhs, we can provide a more
   3281     # descriptive error message like:
   3282     #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
   3283     # Instead of:
   3284     #   Consider using CHECK_EQ instead of CHECK(a == b)
   3285     #
   3286     # We are still keeping the less descriptive message because if lhs
   3287     # or rhs gets long, the error message might become unreadable.
   3288     error(filename, linenum, 'readability/check', 2,
   3289           'Consider using %s instead of %s(a %s b)' % (
   3290               _CHECK_REPLACEMENT[check_macro][operator],
   3291               check_macro, operator))
   3292 
   3293 
   3294 def CheckAltTokens(filename, clean_lines, linenum, error):
   3295   """Check alternative keywords being used in boolean expressions.
   3296 
   3297   Args:
   3298     filename: The name of the current file.
   3299     clean_lines: A CleansedLines instance containing the file.
   3300     linenum: The number of the line to check.
   3301     error: The function to call with any errors found.
   3302   """
   3303   line = clean_lines.elided[linenum]
   3304 
   3305   # Avoid preprocessor lines
   3306   if Match(r'^\s*#', line):
   3307     return
   3308 
   3309   # Last ditch effort to avoid multi-line comments.  This will not help
   3310   # if the comment started before the current line or ended after the
   3311   # current line, but it catches most of the false positives.  At least,
   3312   # it provides a way to workaround this warning for people who use
   3313   # multi-line comments in preprocessor macros.
   3314   #
   3315   # TODO(unknown): remove this once cpplint has better support for
   3316   # multi-line comments.
   3317   if line.find('/*') >= 0 or line.find('*/') >= 0:
   3318     return
   3319 
   3320   for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
   3321     error(filename, linenum, 'readability/alt_tokens', 2,
   3322           'Use operator %s instead of %s' % (
   3323               _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
   3324 
   3325 
   3326 def GetLineWidth(line):
   3327   """Determines the width of the line in column positions.
   3328 
   3329   Args:
   3330     line: A string, which may be a Unicode string.
   3331 
   3332   Returns:
   3333     The width of the line in column positions, accounting for Unicode
   3334     combining characters and wide characters.
   3335   """
   3336   if isinstance(line, unicode):
   3337     width = 0
   3338     for uc in unicodedata.normalize('NFC', line):
   3339       if unicodedata.east_asian_width(uc) in ('W', 'F'):
   3340         width += 2
   3341       elif not unicodedata.combining(uc):
   3342         width += 1
   3343     return width
   3344   else:
   3345     return len(line)
   3346 
   3347 
   3348 def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
   3349                error):
   3350   """Checks rules from the 'C++ style rules' section of cppguide.html.
   3351 
   3352   Most of these rules are hard to test (naming, comment style), but we
   3353   do what we can.  In particular we check for 2-space indents, line lengths,
   3354   tab usage, spaces inside code, etc.
   3355 
   3356   Args:
   3357     filename: The name of the current file.
   3358     clean_lines: A CleansedLines instance containing the file.
   3359     linenum: The number of the line to check.
   3360     file_extension: The extension (without the dot) of the filename.
   3361     nesting_state: A _NestingState instance which maintains information about
   3362                    the current stack of nested blocks being parsed.
   3363     error: The function to call with any errors found.
   3364   """
   3365 
   3366   # Don't use "elided" lines here, otherwise we can't check commented lines.
   3367   # Don't want to use "raw" either, because we don't want to check inside C++11
   3368   # raw strings,
   3369   raw_lines = clean_lines.lines_without_raw_strings
   3370   line = raw_lines[linenum]
   3371 
   3372   if line.find('\t') != -1:
   3373     error(filename, linenum, 'whitespace/tab', 1,
   3374           'Tab found; better to use spaces')
   3375 
   3376   # One or three blank spaces at the beginning of the line is weird; it's
   3377   # hard to reconcile that with 2-space indents.
   3378   # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
   3379   # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
   3380   # if(RLENGTH > 20) complain = 0;
   3381   # if(match($0, " +(error|private|public|protected):")) complain = 0;
   3382   # if(match(prev, "&& *$")) complain = 0;
   3383   # if(match(prev, "\\|\\| *$")) complain = 0;
   3384   # if(match(prev, "[\",=><] *$")) complain = 0;
   3385   # if(match($0, " <<")) complain = 0;
   3386   # if(match(prev, " +for \\(")) complain = 0;
   3387   # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
   3388   initial_spaces = 0
   3389   cleansed_line = clean_lines.elided[linenum]
   3390   while initial_spaces < len(line) and line[initial_spaces] == ' ':
   3391     initial_spaces += 1
   3392   if line and line[-1].isspace():
   3393     error(filename, linenum, 'whitespace/end_of_line', 4,
   3394           'Line ends in whitespace.  Consider deleting these extra spaces.')
   3395   # There are certain situations we allow one space, notably for section labels
   3396   elif ((initial_spaces == 1 or initial_spaces == 3) and
   3397         not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
   3398     error(filename, linenum, 'whitespace/indent', 3,
   3399           'Weird number of spaces at line-start.  '
   3400           'Are you using a 2-space indent?')
   3401 
   3402   # Check if the line is a header guard.
   3403   is_header_guard = False
   3404   if file_extension == 'h':
   3405     cppvar = GetHeaderGuardCPPVariable(filename)
   3406     if (line.startswith('#ifndef %s' % cppvar) or
   3407         line.startswith('#define %s' % cppvar) or
   3408         line.startswith('#endif  // %s' % cppvar)):
   3409       is_header_guard = True
   3410   # #include lines and header guards can be long, since there's no clean way to
   3411   # split them.
   3412   #
   3413   # URLs can be long too.  It's possible to split these, but it makes them
   3414   # harder to cut&paste.
   3415   #
   3416   # The "$Id:...$" comment may also get very long without it being the
   3417   # developers fault.
   3418   if (not line.startswith('#include') and not is_header_guard and
   3419       not Match(r'^\s*//.*http(s?)://\S*$', line) and
   3420       not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
   3421     line_width = GetLineWidth(line)
   3422     extended_length = int((_line_length * 1.25))
   3423     if line_width > extended_length:
   3424       error(filename, linenum, 'whitespace/line_length', 4,
   3425             'Lines should very rarely be longer than %i characters' %
   3426             extended_length)
   3427     elif line_width > _line_length:
   3428       error(filename, linenum, 'whitespace/line_length', 2,
   3429             'Lines should be <= %i characters long' % _line_length)
   3430 
   3431   if (cleansed_line.count(';') > 1 and
   3432       # for loops are allowed two ;'s (and may run over two lines).
   3433       cleansed_line.find('for') == -1 and
   3434       (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
   3435        GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
   3436       # It's ok to have many commands in a switch case that fits in 1 line
   3437       not ((cleansed_line.find('case ') != -1 or
   3438             cleansed_line.find('default:') != -1) and
   3439            cleansed_line.find('break;') != -1)):
   3440     error(filename, linenum, 'whitespace/newline', 0,
   3441           'More than one command on the same line')
   3442 
   3443   # Some more style checks
   3444   CheckBraces(filename, clean_lines, linenum, error)
   3445   CheckEmptyBlockBody(filename, clean_lines, linenum, error)
   3446   CheckAccess(filename, clean_lines, linenum, nesting_state, error)
   3447   CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
   3448   CheckCheck(filename, clean_lines, linenum, error)
   3449   CheckAltTokens(filename, clean_lines, linenum, error)
   3450   classinfo = nesting_state.InnermostClass()
   3451   if classinfo:
   3452     CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
   3453 
   3454 
   3455 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
   3456 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
   3457 # Matches the first component of a filename delimited by -s and _s. That is:
   3458 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
   3459 #  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
   3460 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
   3461 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
   3462 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
   3463 
   3464 
   3465 def _DropCommonSuffixes(filename):
   3466   """Drops common suffixes like _test.cc or -inl.h from filename.
   3467 
   3468   For example:
   3469     >>> _DropCommonSuffixes('foo/foo-inl.h')
   3470     'foo/foo'
   3471     >>> _DropCommonSuffixes('foo/bar/foo.cc')
   3472     'foo/bar/foo'
   3473     >>> _DropCommonSuffixes('foo/foo_internal.h')
   3474     'foo/foo'
   3475     >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
   3476     'foo/foo_unusualinternal'
   3477 
   3478   Args:
   3479     filename: The input filename.
   3480 
   3481   Returns:
   3482     The filename with the common suffix removed.
   3483   """
   3484   for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
   3485                  'inl.h', 'impl.h', 'internal.h'):
   3486     if (filename.endswith(suffix) and len(filename) > len(suffix) and
   3487         filename[-len(suffix) - 1] in ('-', '_')):
   3488       return filename[:-len(suffix) - 1]
   3489   return os.path.splitext(filename)[0]
   3490 
   3491 
   3492 def _IsTestFilename(filename):
   3493   """Determines if the given filename has a suffix that identifies it as a test.
   3494 
   3495   Args:
   3496     filename: The input filename.
   3497 
   3498   Returns:
   3499     True if 'filename' looks like a test, False otherwise.
   3500   """
   3501   if (filename.endswith('_test.cc') or
   3502       filename.endswith('_unittest.cc') or
   3503       filename.endswith('_regtest.cc')):
   3504     return True
   3505   else:
   3506     return False
   3507 
   3508 
   3509 def _ClassifyInclude(fileinfo, include, is_system):
   3510   """Figures out what kind of header 'include' is.
   3511 
   3512   Args:
   3513     fileinfo: The current file cpplint is running over. A FileInfo instance.
   3514     include: The path to a #included file.
   3515     is_system: True if the #include used <> rather than "".
   3516 
   3517   Returns:
   3518     One of the _XXX_HEADER constants.
   3519 
   3520   For example:
   3521     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
   3522     _C_SYS_HEADER
   3523     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
   3524     _CPP_SYS_HEADER
   3525     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
   3526     _LIKELY_MY_HEADER
   3527     >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
   3528     ...                  'bar/foo_other_ext.h', False)
   3529     _POSSIBLE_MY_HEADER
   3530     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
   3531     _OTHER_HEADER
   3532   """
   3533   # This is a list of all standard c++ header files, except
   3534   # those already checked for above.
   3535   is_cpp_h = include in _CPP_HEADERS
   3536 
   3537   if is_system:
   3538     if is_cpp_h:
   3539       return _CPP_SYS_HEADER
   3540     else:
   3541       return _C_SYS_HEADER
   3542 
   3543   # If the target file and the include we're checking share a
   3544   # basename when we drop common extensions, and the include
   3545   # lives in . , then it's likely to be owned by the target file.
   3546   target_dir, target_base = (
   3547       os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
   3548   include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
   3549   if target_base == include_base and (
   3550       include_dir == target_dir or
   3551       include_dir == os.path.normpath(target_dir + '/../public')):
   3552     return _LIKELY_MY_HEADER
   3553 
   3554   # If the target and include share some initial basename
   3555   # component, it's possible the target is implementing the
   3556   # include, so it's allowed to be first, but we'll never
   3557   # complain if it's not there.
   3558   target_first_component = _RE_FIRST_COMPONENT.match(target_base)
   3559   include_first_component = _RE_FIRST_COMPONENT.match(include_base)
   3560   if (target_first_component and include_first_component and
   3561       target_first_component.group(0) ==
   3562       include_first_component.group(0)):
   3563     return _POSSIBLE_MY_HEADER
   3564 
   3565   return _OTHER_HEADER
   3566 
   3567 
   3568 
   3569 def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
   3570   """Check rules that are applicable to #include lines.
   3571 
   3572   Strings on #include lines are NOT removed from elided line, to make
   3573   certain tasks easier. However, to prevent false positives, checks
   3574   applicable to #include lines in CheckLanguage must be put here.
   3575 
   3576   Args:
   3577     filename: The name of the current file.
   3578     clean_lines: A CleansedLines instance containing the file.
   3579     linenum: The number of the line to check.
   3580     include_state: An _IncludeState instance in which the headers are inserted.
   3581     error: The function to call with any errors found.
   3582   """
   3583   fileinfo = FileInfo(filename)
   3584 
   3585   line = clean_lines.lines[linenum]
   3586 
   3587   # "include" should use the new style "foo/bar.h" instead of just "bar.h"
   3588   if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
   3589     error(filename, linenum, 'build/include', 4,
   3590           'Include the directory when naming .h files')
   3591 
   3592   # we shouldn't include a file more than once. actually, there are a
   3593   # handful of instances where doing so is okay, but in general it's
   3594   # not.
   3595   match = _RE_PATTERN_INCLUDE.search(line)
   3596   if match:
   3597     include = match.group(2)
   3598     is_system = (match.group(1) == '<')
   3599     if include in include_state:
   3600       error(filename, linenum, 'build/include', 4,
   3601             '"%s" already included at %s:%s' %
   3602             (include, filename, include_state[include]))
   3603     else:
   3604       include_state[include] = linenum
   3605 
   3606       # We want to ensure that headers appear in the right order:
   3607       # 1) for foo.cc, foo.h  (preferred location)
   3608       # 2) c system files
   3609       # 3) cpp system files
   3610       # 4) for foo.cc, foo.h  (deprecated location)
   3611       # 5) other google headers
   3612       #
   3613       # We classify each include statement as one of those 5 types
   3614       # using a number of techniques. The include_state object keeps
   3615       # track of the highest type seen, and complains if we see a
   3616       # lower type after that.
   3617       error_message = include_state.CheckNextIncludeOrder(
   3618           _ClassifyInclude(fileinfo, include, is_system))
   3619       if error_message:
   3620         error(filename, linenum, 'build/include_order', 4,
   3621               '%s. Should be: %s.h, c system, c++ system, other.' %
   3622               (error_message, fileinfo.BaseName()))
   3623       canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
   3624       if not include_state.IsInAlphabeticalOrder(
   3625           clean_lines, linenum, canonical_include):
   3626         error(filename, linenum, 'build/include_alpha', 4,
   3627               'Include "%s" not in alphabetical order' % include)
   3628       include_state.SetLastHeader(canonical_include)
   3629 
   3630   # Look for any of the stream classes that are part of standard C++.
   3631   match = _RE_PATTERN_INCLUDE.match(line)
   3632   if match:
   3633     include = match.group(2)
   3634     if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
   3635       # Many unit tests use cout, so we exempt them.
   3636       if not _IsTestFilename(filename):
   3637         error(filename, linenum, 'readability/streams', 3,
   3638               'Streams are highly discouraged.')
   3639 
   3640 
   3641 def _GetTextInside(text, start_pattern):
   3642   r"""Retrieves all the text between matching open and close parentheses.
   3643 
   3644   Given a string of lines and a regular expression string, retrieve all the text
   3645   following the expression and between opening punctuation symbols like
   3646   (, [, or {, and the matching close-punctuation symbol. This properly nested
   3647   occurrences of the punctuations, so for the text like
   3648     printf(a(), b(c()));
   3649   a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
   3650   start_pattern must match string having an open punctuation symbol at the end.
   3651 
   3652   Args:
   3653     text: The lines to extract text. Its comments and strings must be elided.
   3654            It can be single line and can span multiple lines.
   3655     start_pattern: The regexp string indicating where to start extracting
   3656                    the text.
   3657   Returns:
   3658     The extracted text.
   3659     None if either the opening string or ending punctuation could not be found.
   3660   """
   3661   # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
   3662   # rewritten to use _GetTextInside (and use inferior regexp matching today).
   3663 
   3664   # Give opening punctuations to get the matching close-punctuations.
   3665   matching_punctuation = {'(': ')', '{': '}', '[': ']'}
   3666   closing_punctuation = set(matching_punctuation.itervalues())
   3667 
   3668   # Find the position to start extracting text.
   3669   match = re.search(start_pattern, text, re.M)
   3670   if not match:  # start_pattern not found in text.
   3671     return None
   3672   start_position = match.end(0)
   3673 
   3674   assert start_position > 0, (
   3675       'start_pattern must ends with an opening punctuation.')
   3676   assert text[start_position - 1] in matching_punctuation, (
   3677       'start_pattern must ends with an opening punctuation.')
   3678   # Stack of closing punctuations we expect to have in text after position.
   3679   punctuation_stack = [matching_punctuation[text[start_position - 1]]]
   3680   position = start_position
   3681   while punctuation_stack and position < len(text):
   3682     if text[position] == punctuation_stack[-1]:
   3683       punctuation_stack.pop()
   3684     elif text[position] in closing_punctuation:
   3685       # A closing punctuation without matching opening punctuations.
   3686       return None
   3687     elif text[position] in matching_punctuation:
   3688       punctuation_stack.append(matching_punctuation[text[position]])
   3689     position += 1
   3690   if punctuation_stack:
   3691     # Opening punctuations left without matching close-punctuations.
   3692     return None
   3693   # punctuations match.
   3694   return text[start_position:position - 1]
   3695 
   3696 
   3697 # Patterns for matching call-by-reference parameters.
   3698 #
   3699 # Supports nested templates up to 2 levels deep using this messy pattern:
   3700 #   < (?: < (?: < [^<>]*
   3701 #               >
   3702 #           |   [^<>] )*
   3703 #         >
   3704 #     |   [^<>] )*
   3705 #   >
   3706 _RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
   3707 _RE_PATTERN_TYPE = (
   3708     r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
   3709     r'(?:\w|'
   3710     r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
   3711     r'::)+')
   3712 # A call-by-reference parameter ends with '& identifier'.
   3713 _RE_PATTERN_REF_PARAM = re.compile(
   3714     r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
   3715     r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
   3716 # A call-by-const-reference parameter either ends with 'const& identifier'
   3717 # or looks like 'const type& identifier' when 'type' is atomic.
   3718 _RE_PATTERN_CONST_REF_PARAM = (
   3719     r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
   3720     r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
   3721 
   3722 
   3723 def CheckLanguage(filename, clean_lines, linenum, file_extension,
   3724                   include_state, nesting_state, error):
   3725   """Checks rules from the 'C++ language rules' section of cppguide.html.
   3726 
   3727   Some of these rules are hard to test (function overloading, using
   3728   uint32 inappropriately), but we do the best we can.
   3729 
   3730   Args:
   3731     filename: The name of the current file.
   3732     clean_lines: A CleansedLines instance containing the file.
   3733     linenum: The number of the line to check.
   3734     file_extension: The extension (without the dot) of the filename.
   3735     include_state: An _IncludeState instance in which the headers are inserted.
   3736     nesting_state: A _NestingState instance which maintains information about
   3737                    the current stack of nested blocks being parsed.
   3738     error: The function to call with any errors found.
   3739   """
   3740   # If the line is empty or consists of entirely a comment, no need to
   3741   # check it.
   3742   line = clean_lines.elided[linenum]
   3743   if not line:
   3744     return
   3745 
   3746   match = _RE_PATTERN_INCLUDE.search(line)
   3747   if match:
   3748     CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
   3749     return
   3750 
   3751   # Reset include state across preprocessor directives.  This is meant
   3752   # to silence warnings for conditional includes.
   3753   if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line):
   3754     include_state.ResetSection()
   3755 
   3756   # Make Windows paths like Unix.
   3757   fullname = os.path.abspath(filename).replace('\\', '/')
   3758 
   3759   # TODO(unknown): figure out if they're using default arguments in fn proto.
   3760 
   3761   # Check to see if they're using an conversion function cast.
   3762   # I just try to capture the most common basic types, though there are more.
   3763   # Parameterless conversion functions, such as bool(), are allowed as they are
   3764   # probably a member operator declaration or default constructor.
   3765   match = Search(
   3766       r'(\bnew\s+)?\b'  # Grab 'new' operator, if it's there
   3767       r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
   3768       r'(\([^)].*)', line)
   3769   if match:
   3770     matched_new = match.group(1)
   3771     matched_type = match.group(2)
   3772     matched_funcptr = match.group(3)
   3773 
   3774     # gMock methods are defined using some variant of MOCK_METHODx(name, type)
   3775     # where type may be float(), int(string), etc.  Without context they are
   3776     # virtually indistinguishable from int(x) casts. Likewise, gMock's
   3777     # MockCallback takes a template parameter of the form return_type(arg_type),
   3778     # which looks much like the cast we're trying to detect.
   3779     #
   3780     # std::function<> wrapper has a similar problem.
   3781     #
   3782     # Return types for function pointers also look like casts if they
   3783     # don't have an extra space.
   3784     if (matched_new is None and  # If new operator, then this isn't a cast
   3785         not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
   3786              Search(r'\bMockCallback<.*>', line) or
   3787              Search(r'\bstd::function<.*>', line)) and
   3788         not (matched_funcptr and
   3789              Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
   3790                    matched_funcptr))):
   3791       # Try a bit harder to catch gmock lines: the only place where
   3792       # something looks like an old-style cast is where we declare the
   3793       # return type of the mocked method, and the only time when we
   3794       # are missing context is if MOCK_METHOD was split across
   3795       # multiple lines.  The missing MOCK_METHOD is usually one or two
   3796       # lines back, so scan back one or two lines.
   3797       #
   3798       # It's not possible for gmock macros to appear in the first 2
   3799       # lines, since the class head + section name takes up 2 lines.
   3800       if (linenum < 2 or
   3801           not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
   3802                      clean_lines.elided[linenum - 1]) or
   3803                Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
   3804                      clean_lines.elided[linenum - 2]))):
   3805         error(filename, linenum, 'readability/casting', 4,
   3806               'Using deprecated casting style.  '
   3807               'Use static_cast<%s>(...) instead' %
   3808               matched_type)
   3809 
   3810   CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
   3811                   'static_cast',
   3812                   r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
   3813 
   3814   # This doesn't catch all cases. Consider (const char * const)"hello".
   3815   #
   3816   # (char *) "foo" should always be a const_cast (reinterpret_cast won't
   3817   # compile).
   3818   if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
   3819                      'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
   3820     pass
   3821   else:
   3822     # Check pointer casts for other than string constants
   3823     CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
   3824                     'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
   3825 
   3826   # In addition, we look for people taking the address of a cast.  This
   3827   # is dangerous -- casts can assign to temporaries, so the pointer doesn't
   3828   # point where you think.
   3829   match = Search(
   3830       r'(?:&\(([^)]+)\)[\w(])|'
   3831       r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line)
   3832   if match and match.group(1) != '*':
   3833     error(filename, linenum, 'runtime/casting', 4,
   3834           ('Are you taking an address of a cast?  '
   3835            'This is dangerous: could be a temp var.  '
   3836            'Take the address before doing the cast, rather than after'))
   3837 
   3838   # Create an extended_line, which is the concatenation of the current and
   3839   # next lines, for more effective checking of code that may span more than one
   3840   # line.
   3841   if linenum + 1 < clean_lines.NumLines():
   3842     extended_line = line + clean_lines.elided[linenum + 1]
   3843   else:
   3844     extended_line = line
   3845 
   3846   # Check for people declaring static/global STL strings at the top level.
   3847   # This is dangerous because the C++ language does not guarantee that
   3848   # globals with constructors are initialized before the first access.
   3849   match = Match(
   3850       r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
   3851       line)
   3852   # Make sure it's not a function.
   3853   # Function template specialization looks like: "string foo<Type>(...".
   3854   # Class template definitions look like: "string Foo<Type>::Method(...".
   3855   #
   3856   # Also ignore things that look like operators.  These are matched separately
   3857   # because operator names cross non-word boundaries.  If we change the pattern
   3858   # above, we would decrease the accuracy of matching identifiers.
   3859   if (match and
   3860       not Search(r'\boperator\W', line) and
   3861       not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))):
   3862     error(filename, linenum, 'runtime/string', 4,
   3863           'For a static/global string constant, use a C style string instead: '
   3864           '"%schar %s[]".' %
   3865           (match.group(1), match.group(2)))
   3866 
   3867   if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
   3868     error(filename, linenum, 'runtime/init', 4,
   3869           'You seem to be initializing a member variable with itself.')
   3870 
   3871   if file_extension == 'h':
   3872     # TODO(unknown): check that 1-arg constructors are explicit.
   3873     #                How to tell it's a constructor?
   3874     #                (handled in CheckForNonStandardConstructs for now)
   3875     # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
   3876     #                (level 1 error)
   3877     pass
   3878 
   3879   # Check if people are using the verboten C basic types.  The only exception
   3880   # we regularly allow is "unsigned short port" for port.
   3881   if Search(r'\bshort port\b', line):
   3882     if not Search(r'\bunsigned short port\b', line):
   3883       error(filename, linenum, 'runtime/int', 4,
   3884             'Use "unsigned short" for ports, not "short"')
   3885   else:
   3886     match = Search(r'\b(short|long(?! +double)|long long)\b', line)
   3887     if match:
   3888       error(filename, linenum, 'runtime/int', 4,
   3889             'Use int16/int64/etc, rather than the C type %s' % match.group(1))
   3890 
   3891   # When snprintf is used, the second argument shouldn't be a literal.
   3892   match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
   3893   if match and match.group(2) != '0':
   3894     # If 2nd arg is zero, snprintf is used to calculate size.
   3895     error(filename, linenum, 'runtime/printf', 3,
   3896           'If you can, use sizeof(%s) instead of %s as the 2nd arg '
   3897           'to snprintf.' % (match.group(1), match.group(2)))
   3898 
   3899   # Check if some verboten C functions are being used.
   3900   if Search(r'\bsprintf\b', line):
   3901     error(filename, linenum, 'runtime/printf', 5,
   3902           'Never use sprintf.  Use snprintf instead.')
   3903   match = Search(r'\b(strcpy|strcat)\b', line)
   3904   if match:
   3905     error(filename, linenum, 'runtime/printf', 4,
   3906           'Almost always, snprintf is better than %s' % match.group(1))
   3907 
   3908   # Check if some verboten operator overloading is going on
   3909   # TODO(unknown): catch out-of-line unary operator&:
   3910   #   class X {};
   3911   #   int operator&(const X& x) { return 42; }  // unary operator&
   3912   # The trick is it's hard to tell apart from binary operator&:
   3913   #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
   3914   if Search(r'\boperator\s*&\s*\(\s*\)', line):
   3915     error(filename, linenum, 'runtime/operator', 4,
   3916           'Unary operator& is dangerous.  Do not use it.')
   3917 
   3918   # Check for suspicious usage of "if" like
   3919   # } if (a == b) {
   3920   if Search(r'\}\s*if\s*\(', line):
   3921     error(filename, linenum, 'readability/braces', 4,
   3922           'Did you mean "else if"? If not, start a new line for "if".')
   3923 
   3924   # Check for potential format string bugs like printf(foo).
   3925   # We constrain the pattern not to pick things like DocidForPrintf(foo).
   3926   # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
   3927   # TODO(sugawarayu): Catch the following case. Need to change the calling
   3928   # convention of the whole function to process multiple line to handle it.
   3929   #   printf(
   3930   #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
   3931   printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
   3932   if printf_args:
   3933     match = Match(r'([\w.\->()]+)$', printf_args)
   3934     if match and match.group(1) != '__VA_ARGS__':
   3935       function_name = re.search(r'\b((?:string)?printf)\s*\(',
   3936                                 line, re.I).group(1)
   3937       error(filename, linenum, 'runtime/printf', 4,
   3938             'Potential format string bug. Do %s("%%s", %s) instead.'
   3939             % (function_name, match.group(1)))
   3940 
   3941   # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
   3942   match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
   3943   if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
   3944     error(filename, linenum, 'runtime/memset', 4,
   3945           'Did you mean "memset(%s, 0, %s)"?'
   3946           % (match.group(1), match.group(2)))
   3947 
   3948   if Search(r'\busing namespace\b', line):
   3949     error(filename, linenum, 'build/namespaces', 5,
   3950           'Do not use namespace using-directives.  '
   3951           'Use using-declarations instead.')
   3952 
   3953   # Detect variable-length arrays.
   3954   match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
   3955   if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
   3956       match.group(3).find(']') == -1):
   3957     # Split the size using space and arithmetic operators as delimiters.
   3958     # If any of the resulting tokens are not compile time constants then
   3959     # report the error.
   3960     tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
   3961     is_const = True
   3962     skip_next = False
   3963     for tok in tokens:
   3964       if skip_next:
   3965         skip_next = False
   3966         continue
   3967 
   3968       if Search(r'sizeof\(.+\)', tok): continue
   3969       if Search(r'arraysize\(\w+\)', tok): continue
   3970 
   3971       tok = tok.lstrip('(')
   3972       tok = tok.rstrip(')')
   3973       if not tok: continue
   3974       if Match(r'\d+', tok): continue
   3975       if Match(r'0[xX][0-9a-fA-F]+', tok): continue
   3976       if Match(r'k[A-Z0-9]\w*', tok): continue
   3977       if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
   3978       if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
   3979       # A catch all for tricky sizeof cases, including 'sizeof expression',
   3980       # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
   3981       # requires skipping the next token because we split on ' ' and '*'.
   3982       if tok.startswith('sizeof'):
   3983         skip_next = True
   3984         continue
   3985       is_const = False
   3986       break
   3987     if not is_const:
   3988       error(filename, linenum, 'runtime/arrays', 1,
   3989             'Do not use variable-length arrays.  Use an appropriately named '
   3990             "('k' followed by CamelCase) compile-time constant for the size.")
   3991 
   3992   # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
   3993   # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
   3994   # in the class declaration.
   3995   match = Match(
   3996       (r'\s*'
   3997        r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
   3998        r'\(.*\);$'),
   3999       line)
   4000   if match and linenum + 1 < clean_lines.NumLines():
   4001     next_line = clean_lines.elided[linenum + 1]
   4002     # We allow some, but not all, declarations of variables to be present
   4003     # in the statement that defines the class.  The [\w\*,\s]* fragment of
   4004     # the regular expression below allows users to declare instances of
   4005     # the class or pointers to instances, but not less common types such
   4006     # as function pointers or arrays.  It's a tradeoff between allowing
   4007     # reasonable code and avoiding trying to parse more C++ using regexps.
   4008     if not Search(r'^\s*}[\w\*,\s]*;', next_line):
   4009       error(filename, linenum, 'readability/constructors', 3,
   4010             match.group(1) + ' should be the last thing in the class')
   4011 
   4012   # Check for use of unnamed namespaces in header files.  Registration
   4013   # macros are typically OK, so we allow use of "namespace {" on lines
   4014   # that end with backslashes.
   4015   if (file_extension == 'h'
   4016       and Search(r'\bnamespace\s*{', line)
   4017       and line[-1] != '\\'):
   4018     error(filename, linenum, 'build/namespaces', 4,
   4019           'Do not use unnamed namespaces in header files.  See '
   4020           'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
   4021           ' for more information.')
   4022 
   4023 def CheckForNonConstReference(filename, clean_lines, linenum,
   4024                               nesting_state, error):
   4025   """Check for non-const references.
   4026 
   4027   Separate from CheckLanguage since it scans backwards from current
   4028   line, instead of scanning forward.
   4029 
   4030   Args:
   4031     filename: The name of the current file.
   4032     clean_lines: A CleansedLines instance containing the file.
   4033     linenum: The number of the line to check.
   4034     nesting_state: A _NestingState instance which maintains information about
   4035                    the current stack of nested blocks being parsed.
   4036     error: The function to call with any errors found.
   4037   """
   4038   # Do nothing if there is no '&' on current line.
   4039   line = clean_lines.elided[linenum]
   4040   if '&' not in line:
   4041     return
   4042 
   4043   # Long type names may be broken across multiple lines, usually in one
   4044   # of these forms:
   4045   #   LongType
   4046   #       ::LongTypeContinued &identifier
   4047   #   LongType::
   4048   #       LongTypeContinued &identifier
   4049   #   LongType<
   4050   #       ...>::LongTypeContinued &identifier
   4051   #
   4052   # If we detected a type split across two lines, join the previous
   4053   # line to current line so that we can match const references
   4054   # accordingly.
   4055   #
   4056   # Note that this only scans back one line, since scanning back
   4057   # arbitrary number of lines would be expensive.  If you have a type
   4058   # that spans more than 2 lines, please use a typedef.
   4059   if linenum > 1:
   4060     previous = None
   4061     if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
   4062       # previous_line\n + ::current_line
   4063       previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
   4064                         clean_lines.elided[linenum - 1])
   4065     elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
   4066       # previous_line::\n + current_line
   4067       previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
   4068                         clean_lines.elided[linenum - 1])
   4069     if previous:
   4070       line = previous.group(1) + line.lstrip()
   4071     else:
   4072       # Check for templated parameter that is split across multiple lines
   4073       endpos = line.rfind('>')
   4074       if endpos > -1:
   4075         (_, startline, startpos) = ReverseCloseExpression(
   4076             clean_lines, linenum, endpos)
   4077         if startpos > -1 and startline < linenum:
   4078           # Found the matching < on an earlier line, collect all
   4079           # pieces up to current line.
   4080           line = ''
   4081           for i in xrange(startline, linenum + 1):
   4082             line += clean_lines.elided[i].strip()
   4083 
   4084   # Check for non-const references in function parameters.  A single '&' may
   4085   # found in the following places:
   4086   #   inside expression: binary & for bitwise AND
   4087   #   inside expression: unary & for taking the address of something
   4088   #   inside declarators: reference parameter
   4089   # We will exclude the first two cases by checking that we are not inside a
   4090   # function body, including one that was just introduced by a trailing '{'.
   4091   # TODO(unknwon): Doesn't account for preprocessor directives.
   4092   # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
   4093   check_params = False
   4094   if not nesting_state.stack:
   4095     check_params = True  # top level
   4096   elif (isinstance(nesting_state.stack[-1], _ClassInfo) or
   4097         isinstance(nesting_state.stack[-1], _NamespaceInfo)):
   4098     check_params = True  # within class or namespace
   4099   elif Match(r'.*{\s*$', line):
   4100     if (len(nesting_state.stack) == 1 or
   4101         isinstance(nesting_state.stack[-2], _ClassInfo) or
   4102         isinstance(nesting_state.stack[-2], _NamespaceInfo)):
   4103       check_params = True  # just opened global/class/namespace block
   4104   # We allow non-const references in a few standard places, like functions
   4105   # called "swap()" or iostream operators like "<<" or ">>".  Do not check
   4106   # those function parameters.
   4107   #
   4108   # We also accept & in static_assert, which looks like a function but
   4109   # it's actually a declaration expression.
   4110   whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
   4111                            r'operator\s*[<>][<>]|'
   4112                            r'static_assert|COMPILE_ASSERT'
   4113                            r')\s*\(')
   4114   if Search(whitelisted_functions, line):
   4115     check_params = False
   4116   elif not Search(r'\S+\([^)]*$', line):
   4117     # Don't see a whitelisted function on this line.  Actually we
   4118     # didn't see any function name on this line, so this is likely a
   4119     # multi-line parameter list.  Try a bit harder to catch this case.
   4120     for i in xrange(2):
   4121       if (linenum > i and
   4122           Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
   4123         check_params = False
   4124         break
   4125 
   4126   if check_params:
   4127     decls = ReplaceAll(r'{[^}]*}', ' ', line)  # exclude function body
   4128     for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
   4129       if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter):
   4130         error(filename, linenum, 'runtime/references', 2,
   4131               'Is this a non-const reference? '
   4132               'If so, make const or use a pointer: ' +
   4133               ReplaceAll(' *<', '<', parameter))
   4134 
   4135 
   4136 def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
   4137                     error):
   4138   """Checks for a C-style cast by looking for the pattern.
   4139 
   4140   Args:
   4141     filename: The name of the current file.
   4142     linenum: The number of the line to check.
   4143     line: The line of code to check.
   4144     raw_line: The raw line of code to check, with comments.
   4145     cast_type: The string for the C++ cast to recommend.  This is either
   4146       reinterpret_cast, static_cast, or const_cast, depending.
   4147     pattern: The regular expression used to find C-style casts.
   4148     error: The function to call with any errors found.
   4149 
   4150   Returns:
   4151     True if an error was emitted.
   4152     False otherwise.
   4153   """
   4154   match = Search(pattern, line)
   4155   if not match:
   4156     return False
   4157 
   4158   # e.g., sizeof(int)
   4159   sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
   4160   if sizeof_match:
   4161     error(filename, linenum, 'runtime/sizeof', 1,
   4162           'Using sizeof(type).  Use sizeof(varname) instead if possible')
   4163     return True
   4164 
   4165   # operator++(int) and operator--(int)
   4166   if (line[0:match.start(1) - 1].endswith(' operator++') or
   4167       line[0:match.start(1) - 1].endswith(' operator--')):
   4168     return False
   4169 
   4170   # A single unnamed argument for a function tends to look like old
   4171   # style cast.  If we see those, don't issue warnings for deprecated
   4172   # casts, instead issue warnings for unnamed arguments where
   4173   # appropriate.
   4174   #
   4175   # These are things that we want warnings for, since the style guide
   4176   # explicitly require all parameters to be named:
   4177   #   Function(int);
   4178   #   Function(int) {
   4179   #   ConstMember(int) const;
   4180   #   ConstMember(int) const {
   4181   #   ExceptionMember(int) throw (...);
   4182   #   ExceptionMember(int) throw (...) {
   4183   #   PureVirtual(int) = 0;
   4184   #
   4185   # These are functions of some sort, where the compiler would be fine
   4186   # if they had named parameters, but people often omit those
   4187   # identifiers to reduce clutter:
   4188   #   (FunctionPointer)(int);
   4189   #   (FunctionPointer)(int) = value;
   4190   #   Function((function_pointer_arg)(int))
   4191   #   <TemplateArgument(int)>;
   4192   #   <(FunctionPointerTemplateArgument)(int)>;
   4193   remainder = line[match.end(0):]
   4194   if Match(r'^\s*(?:;|const\b|throw\b|=|>|\{|\))', remainder):
   4195     # Looks like an unnamed parameter.
   4196 
   4197     # Don't warn on any kind of template arguments.
   4198     if Match(r'^\s*>', remainder):
   4199       return False
   4200 
   4201     # Don't warn on assignments to function pointers, but keep warnings for
   4202     # unnamed parameters to pure virtual functions.  Note that this pattern
   4203     # will also pass on assignments of "0" to function pointers, but the
   4204     # preferred values for those would be "nullptr" or "NULL".
   4205     matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder)
   4206     if matched_zero and matched_zero.group(1) != '0':
   4207       return False
   4208 
   4209     # Don't warn on function pointer declarations.  For this we need
   4210     # to check what came before the "(type)" string.
   4211     if Match(r'.*\)\s*$', line[0:match.start(0)]):
   4212       return False
   4213 
   4214     # Don't warn if the parameter is named with block comments, e.g.:
   4215     #  Function(int /*unused_param*/);
   4216     if '/*' in raw_line:
   4217       return False
   4218 
   4219     # Passed all filters, issue warning here.
   4220     error(filename, linenum, 'readability/function', 3,
   4221           'All parameters should be named in a function')
   4222     return True
   4223 
   4224   # At this point, all that should be left is actual casts.
   4225   error(filename, linenum, 'readability/casting', 4,
   4226         'Using C-style cast.  Use %s<%s>(...) instead' %
   4227         (cast_type, match.group(1)))
   4228 
   4229   return True
   4230 
   4231 
   4232 _HEADERS_CONTAINING_TEMPLATES = (
   4233     ('<deque>', ('deque',)),
   4234     ('<functional>', ('unary_function', 'binary_function',
   4235                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
   4236                       'negate',
   4237                       'equal_to', 'not_equal_to', 'greater', 'less',
   4238                       'greater_equal', 'less_equal',
   4239                       'logical_and', 'logical_or', 'logical_not',
   4240                       'unary_negate', 'not1', 'binary_negate', 'not2',
   4241                       'bind1st', 'bind2nd',
   4242                       'pointer_to_unary_function',
   4243                       'pointer_to_binary_function',
   4244                       'ptr_fun',
   4245                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
   4246                       'mem_fun_ref_t',
   4247                       'const_mem_fun_t', 'const_mem_fun1_t',
   4248                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
   4249                       'mem_fun_ref',
   4250                      )),
   4251     ('<limits>', ('numeric_limits',)),
   4252     ('<list>', ('list',)),
   4253     ('<map>', ('map', 'multimap',)),
   4254     ('<memory>', ('allocator',)),
   4255     ('<queue>', ('queue', 'priority_queue',)),
   4256     ('<set>', ('set', 'multiset',)),
   4257     ('<stack>', ('stack',)),
   4258     ('<string>', ('char_traits', 'basic_string',)),
   4259     ('<utility>', ('pair',)),
   4260     ('<vector>', ('vector',)),
   4261 
   4262     # gcc extensions.
   4263     # Note: std::hash is their hash, ::hash is our hash
   4264     ('<hash_map>', ('hash_map', 'hash_multimap',)),
   4265     ('<hash_set>', ('hash_set', 'hash_multiset',)),
   4266     ('<slist>', ('slist',)),
   4267     )
   4268 
   4269 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
   4270 
   4271 _re_pattern_algorithm_header = []
   4272 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
   4273                   'transform'):
   4274   # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   4275   # type::max().
   4276   _re_pattern_algorithm_header.append(
   4277       (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
   4278        _template,
   4279        '<algorithm>'))
   4280 
   4281 _re_pattern_templates = []
   4282 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
   4283   for _template in _templates:
   4284     _re_pattern_templates.append(
   4285         (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
   4286          _template + '<>',
   4287          _header))
   4288 
   4289 
   4290 def FilesBelongToSameModule(filename_cc, filename_h):
   4291   """Check if these two filenames belong to the same module.
   4292 
   4293   The concept of a 'module' here is a as follows:
   4294   foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
   4295   same 'module' if they are in the same directory.
   4296   some/path/public/xyzzy and some/path/internal/xyzzy are also considered
   4297   to belong to the same module here.
   4298 
   4299   If the filename_cc contains a longer path than the filename_h, for example,
   4300   '/absolute/path/to/base/sysinfo.cc', and this file would include
   4301   'base/sysinfo.h', this function also produces the prefix needed to open the
   4302   header. This is used by the caller of this function to more robustly open the
   4303   header file. We don't have access to the real include paths in this context,
   4304   so we need this guesswork here.
   4305 
   4306   Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
   4307   according to this implementation. Because of this, this function gives
   4308   some false positives. This should be sufficiently rare in practice.
   4309 
   4310   Args:
   4311     filename_cc: is the path for the .cc file
   4312     filename_h: is the path for the header path
   4313 
   4314   Returns:
   4315     Tuple with a bool and a string:
   4316     bool: True if filename_cc and filename_h belong to the same module.
   4317     string: the additional prefix needed to open the header file.
   4318   """
   4319 
   4320   if not filename_cc.endswith('.cc'):
   4321     return (False, '')
   4322   filename_cc = filename_cc[:-len('.cc')]
   4323   if filename_cc.endswith('_unittest'):
   4324     filename_cc = filename_cc[:-len('_unittest')]
   4325   elif filename_cc.endswith('_test'):
   4326     filename_cc = filename_cc[:-len('_test')]
   4327   filename_cc = filename_cc.replace('/public/', '/')
   4328   filename_cc = filename_cc.replace('/internal/', '/')
   4329 
   4330   if not filename_h.endswith('.h'):
   4331     return (False, '')
   4332   filename_h = filename_h[:-len('.h')]
   4333   if filename_h.endswith('-inl'):
   4334     filename_h = filename_h[:-len('-inl')]
   4335   filename_h = filename_h.replace('/public/', '/')
   4336   filename_h = filename_h.replace('/internal/', '/')
   4337 
   4338   files_belong_to_same_module = filename_cc.endswith(filename_h)
   4339   common_path = ''
   4340   if files_belong_to_same_module:
   4341     common_path = filename_cc[:-len(filename_h)]
   4342   return files_belong_to_same_module, common_path
   4343 
   4344 
   4345 def UpdateIncludeState(filename, include_state, io=codecs):
   4346   """Fill up the include_state with new includes found from the file.
   4347 
   4348   Args:
   4349     filename: the name of the header to read.
   4350     include_state: an _IncludeState instance in which the headers are inserted.
   4351     io: The io factory to use to read the file. Provided for testability.
   4352 
   4353   Returns:
   4354     True if a header was succesfully added. False otherwise.
   4355   """
   4356   headerfile = None
   4357   try:
   4358     headerfile = io.open(filename, 'r', 'utf8', 'replace')
   4359   except IOError:
   4360     return False
   4361   linenum = 0
   4362   for line in headerfile:
   4363     linenum += 1
   4364     clean_line = CleanseComments(line)
   4365     match = _RE_PATTERN_INCLUDE.search(clean_line)
   4366     if match:
   4367       include = match.group(2)
   4368       # The value formatting is cute, but not really used right now.
   4369       # What matters here is that the key is in include_state.
   4370       include_state.setdefault(include, '%s:%d' % (filename, linenum))
   4371   return True
   4372 
   4373 
   4374 def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
   4375                               io=codecs):
   4376   """Reports for missing stl includes.
   4377 
   4378   This function will output warnings to make sure you are including the headers
   4379   necessary for the stl containers and functions that you use. We only give one
   4380   reason to include a header. For example, if you use both equal_to<> and
   4381   less<> in a .h file, only one (the latter in the file) of these will be
   4382   reported as a reason to include the <functional>.
   4383 
   4384   Args:
   4385     filename: The name of the current file.
   4386     clean_lines: A CleansedLines instance containing the file.
   4387     include_state: An _IncludeState instance.
   4388     error: The function to call with any errors found.
   4389     io: The IO factory to use to read the header file. Provided for unittest
   4390         injection.
   4391   """
   4392   required = {}  # A map of header name to linenumber and the template entity.
   4393                  # Example of required: { '<functional>': (1219, 'less<>') }
   4394 
   4395   for linenum in xrange(clean_lines.NumLines()):
   4396     line = clean_lines.elided[linenum]
   4397     if not line or line[0] == '#':
   4398       continue
   4399 
   4400     # String is special -- it is a non-templatized type in STL.
   4401     matched = _RE_PATTERN_STRING.search(line)
   4402     if matched:
   4403       # Don't warn about strings in non-STL namespaces:
   4404       # (We check only the first match per line; good enough.)
   4405       prefix = line[:matched.start()]
   4406       if prefix.endswith('std::') or not prefix.endswith('::'):
   4407         required['<string>'] = (linenum, 'string')
   4408 
   4409     for pattern, template, header in _re_pattern_algorithm_header:
   4410       if pattern.search(line):
   4411         required[header] = (linenum, template)
   4412 
   4413     # The following function is just a speed up, no semantics are changed.
   4414     if not '<' in line:  # Reduces the cpu time usage by skipping lines.
   4415       continue
   4416 
   4417     for pattern, template, header in _re_pattern_templates:
   4418       if pattern.search(line):
   4419         required[header] = (linenum, template)
   4420 
   4421   # The policy is that if you #include something in foo.h you don't need to
   4422   # include it again in foo.cc. Here, we will look at possible includes.
   4423   # Let's copy the include_state so it is only messed up within this function.
   4424   include_state = include_state.copy()
   4425 
   4426   # Did we find the header for this file (if any) and succesfully load it?
   4427   header_found = False
   4428 
   4429   # Use the absolute path so that matching works properly.
   4430   abs_filename = FileInfo(filename).FullName()
   4431 
   4432   # For Emacs's flymake.
   4433   # If cpplint is invoked from Emacs's flymake, a temporary file is generated
   4434   # by flymake and that file name might end with '_flymake.cc'. In that case,
   4435   # restore original file name here so that the corresponding header file can be
   4436   # found.
   4437   # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
   4438   # instead of 'foo_flymake.h'
   4439   abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
   4440 
   4441   # include_state is modified during iteration, so we iterate over a copy of
   4442   # the keys.
   4443   header_keys = include_state.keys()
   4444   for header in header_keys:
   4445     (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
   4446     fullpath = common_path + header
   4447     if same_module and UpdateIncludeState(fullpath, include_state, io):
   4448       header_found = True
   4449 
   4450   # If we can't find the header file for a .cc, assume it's because we don't
   4451   # know where to look. In that case we'll give up as we're not sure they
   4452   # didn't include it in the .h file.
   4453   # TODO(unknown): Do a better job of finding .h files so we are confident that
   4454   # not having the .h file means there isn't one.
   4455   if filename.endswith('.cc') and not header_found:
   4456     return
   4457 
   4458   # All the lines have been processed, report the errors found.
   4459   for required_header_unstripped in required:
   4460     template = required[required_header_unstripped][1]
   4461     if required_header_unstripped.strip('<>"') not in include_state:
   4462       error(filename, required[required_header_unstripped][0],
   4463             'build/include_what_you_use', 4,
   4464             'Add #include ' + required_header_unstripped + ' for ' + template)
   4465 
   4466 
   4467 _RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
   4468 
   4469 
   4470 def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
   4471   """Check that make_pair's template arguments are deduced.
   4472 
   4473   G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
   4474   specified explicitly, and such use isn't intended in any case.
   4475 
   4476   Args:
   4477     filename: The name of the current file.
   4478     clean_lines: A CleansedLines instance containing the file.
   4479     linenum: The number of the line to check.
   4480     error: The function to call with any errors found.
   4481   """
   4482   line = clean_lines.elided[linenum]
   4483   match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
   4484   if match:
   4485     error(filename, linenum, 'build/explicit_make_pair',
   4486           4,  # 4 = high confidence
   4487           'For C++11-compatibility, omit template arguments from make_pair'
   4488           ' OR use pair directly OR if appropriate, construct a pair directly')
   4489 
   4490 
   4491 def ProcessLine(filename, file_extension, clean_lines, line,
   4492                 include_state, function_state, nesting_state, error,
   4493                 extra_check_functions=[]):
   4494   """Processes a single line in the file.
   4495 
   4496   Args:
   4497     filename: Filename of the file that is being processed.
   4498     file_extension: The extension (dot not included) of the file.
   4499     clean_lines: An array of strings, each representing a line of the file,
   4500                  with comments stripped.
   4501     line: Number of line being processed.
   4502     include_state: An _IncludeState instance in which the headers are inserted.
   4503     function_state: A _FunctionState instance which counts function lines, etc.
   4504     nesting_state: A _NestingState instance which maintains information about
   4505                    the current stack of nested blocks being parsed.
   4506     error: A callable to which errors are reported, which takes 4 arguments:
   4507            filename, line number, error level, and message
   4508     extra_check_functions: An array of additional check functions that will be
   4509                            run on each source line. Each function takes 4
   4510                            arguments: filename, clean_lines, line, error
   4511   """
   4512   raw_lines = clean_lines.raw_lines
   4513   ParseNolintSuppressions(filename, raw_lines[line], line, error)
   4514   nesting_state.Update(filename, clean_lines, line, error)
   4515   if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
   4516     return
   4517   CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
   4518   CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
   4519   CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
   4520   CheckLanguage(filename, clean_lines, line, file_extension, include_state,
   4521                 nesting_state, error)
   4522   CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
   4523   CheckForNonStandardConstructs(filename, clean_lines, line,
   4524                                 nesting_state, error)
   4525   CheckVlogArguments(filename, clean_lines, line, error)
   4526   CheckPosixThreading(filename, clean_lines, line, error)
   4527   CheckInvalidIncrement(filename, clean_lines, line, error)
   4528   CheckMakePairUsesDeduction(filename, clean_lines, line, error)
   4529   for check_fn in extra_check_functions:
   4530     check_fn(filename, clean_lines, line, error)
   4531 
   4532 def ProcessFileData(filename, file_extension, lines, error,
   4533                     extra_check_functions=[]):
   4534   """Performs lint checks and reports any errors to the given error function.
   4535 
   4536   Args:
   4537     filename: Filename of the file that is being processed.
   4538     file_extension: The extension (dot not included) of the file.
   4539     lines: An array of strings, each representing a line of the file, with the
   4540            last element being empty if the file is terminated with a newline.
   4541     error: A callable to which errors are reported, which takes 4 arguments:
   4542            filename, line number, error level, and message
   4543     extra_check_functions: An array of additional check functions that will be
   4544                            run on each source line. Each function takes 4
   4545                            arguments: filename, clean_lines, line, error
   4546   """
   4547   lines = (['// marker so line numbers and indices both start at 1'] + lines +
   4548            ['// marker so line numbers end in a known way'])
   4549 
   4550   include_state = _IncludeState()
   4551   function_state = _FunctionState()
   4552   nesting_state = _NestingState()
   4553 
   4554   ResetNolintSuppressions()
   4555 
   4556   CheckForCopyright(filename, lines, error)
   4557 
   4558   if file_extension == 'h':
   4559     CheckForHeaderGuard(filename, lines, error)
   4560 
   4561   RemoveMultiLineComments(filename, lines, error)
   4562   clean_lines = CleansedLines(lines)
   4563   for line in xrange(clean_lines.NumLines()):
   4564     ProcessLine(filename, file_extension, clean_lines, line,
   4565                 include_state, function_state, nesting_state, error,
   4566                 extra_check_functions)
   4567   nesting_state.CheckCompletedBlocks(filename, error)
   4568 
   4569   CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
   4570 
   4571   # We check here rather than inside ProcessLine so that we see raw
   4572   # lines rather than "cleaned" lines.
   4573   CheckForBadCharacters(filename, lines, error)
   4574 
   4575   CheckForNewlineAtEOF(filename, lines, error)
   4576 
   4577 def ProcessFile(filename, vlevel, extra_check_functions=[]):
   4578   """Does google-lint on a single file.
   4579 
   4580   Args:
   4581     filename: The name of the file to parse.
   4582 
   4583     vlevel: The level of errors to report.  Every error of confidence
   4584     >= verbose_level will be reported.  0 is a good default.
   4585 
   4586     extra_check_functions: An array of additional check functions that will be
   4587                            run on each source line. Each function takes 4
   4588                            arguments: filename, clean_lines, line, error
   4589   """
   4590 
   4591   _SetVerboseLevel(vlevel)
   4592 
   4593   try:
   4594     # Support the UNIX convention of using "-" for stdin.  Note that
   4595     # we are not opening the file with universal newline support
   4596     # (which codecs doesn't support anyway), so the resulting lines do
   4597     # contain trailing '\r' characters if we are reading a file that
   4598     # has CRLF endings.
   4599     # If after the split a trailing '\r' is present, it is removed
   4600     # below. If it is not expected to be present (i.e. os.linesep !=
   4601     # '\r\n' as in Windows), a warning is issued below if this file
   4602     # is processed.
   4603 
   4604     if filename == '-':
   4605       lines = codecs.StreamReaderWriter(sys.stdin,
   4606                                         codecs.getreader('utf8'),
   4607                                         codecs.getwriter('utf8'),
   4608                                         'replace').read().split('\n')
   4609     else:
   4610       lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
   4611 
   4612     carriage_return_found = False
   4613     # Remove trailing '\r'.
   4614     for linenum in range(len(lines)):
   4615       if lines[linenum].endswith('\r'):
   4616         lines[linenum] = lines[linenum].rstrip('\r')
   4617         carriage_return_found = True
   4618 
   4619   except IOError:
   4620     sys.stderr.write(
   4621         "Skipping input '%s': Can't open for reading\n" % filename)
   4622     return
   4623 
   4624   # Note, if no dot is found, this will give the entire filename as the ext.
   4625   file_extension = filename[filename.rfind('.') + 1:]
   4626 
   4627   # When reading from stdin, the extension is unknown, so no cpplint tests
   4628   # should rely on the extension.
   4629   if filename != '-' and file_extension not in _valid_extensions:
   4630     sys.stderr.write('Ignoring %s; not a valid file name '
   4631                      '(%s)\n' % (filename, ', '.join(_valid_extensions)))
   4632   else:
   4633     ProcessFileData(filename, file_extension, lines, Error,
   4634                     extra_check_functions)
   4635     if carriage_return_found and os.linesep != '\r\n':
   4636       # Use 0 for linenum since outputting only one error for potentially
   4637       # several lines.
   4638       Error(filename, 0, 'whitespace/newline', 1,
   4639             'One or more unexpected \\r (^M) found;'
   4640             'better to use only a \\n')
   4641 
   4642   sys.stderr.write('Done processing %s\n' % filename)
   4643 
   4644 
   4645 def PrintUsage(message):
   4646   """Prints a brief usage string and exits, optionally with an error message.
   4647 
   4648   Args:
   4649     message: The optional error message.
   4650   """
   4651   sys.stderr.write(_USAGE)
   4652   if message:
   4653     sys.exit('\nFATAL ERROR: ' + message)
   4654   else:
   4655     sys.exit(1)
   4656 
   4657 
   4658 def PrintCategories():
   4659   """Prints a list of all the error-categories used by error messages.
   4660 
   4661   These are the categories used to filter messages via --filter.
   4662   """
   4663   sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
   4664   sys.exit(0)
   4665 
   4666 
   4667 def ParseArguments(args):
   4668   """Parses the command line arguments.
   4669 
   4670   This may set the output format and verbosity level as side-effects.
   4671 
   4672   Args:
   4673     args: The command line arguments:
   4674 
   4675   Returns:
   4676     The list of filenames to lint.
   4677   """
   4678   try:
   4679     (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
   4680                                                  'counting=',
   4681                                                  'filter=',
   4682                                                  'root=',
   4683                                                  'linelength=',
   4684                                                  'extensions='])
   4685   except getopt.GetoptError:
   4686     PrintUsage('Invalid arguments.')
   4687 
   4688   verbosity = _VerboseLevel()
   4689   output_format = _OutputFormat()
   4690   filters = ''
   4691   counting_style = ''
   4692 
   4693   for (opt, val) in opts:
   4694     if opt == '--help':
   4695       PrintUsage(None)
   4696     elif opt == '--output':
   4697       if val not in ('emacs', 'vs7', 'eclipse'):
   4698         PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
   4699       output_format = val
   4700     elif opt == '--verbose':
   4701       verbosity = int(val)
   4702     elif opt == '--filter':
   4703       filters = val
   4704       if not filters:
   4705         PrintCategories()
   4706     elif opt == '--counting':
   4707       if val not in ('total', 'toplevel', 'detailed'):
   4708         PrintUsage('Valid counting options are total, toplevel, and detailed')
   4709       counting_style = val
   4710     elif opt == '--root':
   4711       global _root
   4712       _root = val
   4713     elif opt == '--linelength':
   4714       global _line_length
   4715       try:
   4716           _line_length = int(val)
   4717       except ValueError:
   4718           PrintUsage('Line length must be digits.')
   4719     elif opt == '--extensions':
   4720       global _valid_extensions
   4721       try:
   4722           _valid_extensions = set(val.split(','))
   4723       except ValueError:
   4724           PrintUsage('Extensions must be comma seperated list.')
   4725 
   4726   if not filenames:
   4727     PrintUsage('No files were specified.')
   4728 
   4729   _SetOutputFormat(output_format)
   4730   _SetVerboseLevel(verbosity)
   4731   _SetFilters(filters)
   4732   _SetCountingStyle(counting_style)
   4733 
   4734   return filenames
   4735 
   4736 
   4737 def main():
   4738   filenames = ParseArguments(sys.argv[1:])
   4739 
   4740   # Change stderr to write with replacement characters so we don't die
   4741   # if we try to print something containing non-ASCII characters.
   4742   sys.stderr = codecs.StreamReaderWriter(sys.stderr,
   4743                                          codecs.getreader('utf8'),
   4744                                          codecs.getwriter('utf8'),
   4745                                          'replace')
   4746 
   4747   _cpplint_state.ResetErrorCounts()
   4748   for filename in filenames:
   4749     ProcessFile(filename, _cpplint_state.verbose_level)
   4750   _cpplint_state.PrintErrorCounts()
   4751 
   4752   sys.exit(_cpplint_state.error_count > 0)
   4753 
   4754 
   4755 if __name__ == '__main__':
   4756   main()
   4757