Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (c) 2009 Google Inc. All rights reserved.
      4 #
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #    * Redistributions of source code must retain the above copyright
     10 # notice, this list of conditions and the following disclaimer.
     11 #    * Redistributions in binary form must reproduce the above
     12 # copyright notice, this list of conditions and the following disclaimer
     13 # in the documentation and/or other materials provided with the
     14 # distribution.
     15 #    * Neither the name of Google Inc. nor the names of its
     16 # contributors may be used to endorse or promote products derived from
     17 # this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 """Does google-lint on c++ files.
     32 
     33 The goal of this script is to identify places in the code that *may*
     34 be in non-compliance with google style.  It does not attempt to fix
     35 up these problems -- the point is to educate.  It does also not
     36 attempt to find all problems, or to ensure that everything it does
     37 find is legitimately a problem.
     38 
     39 In particular, we can get very confused by /* and // inside strings!
     40 We do a small hack, which is to ignore //'s with "'s after them on the
     41 same line, but it is far from perfect (in either direction).
     42 """
     43 
     44 import codecs
     45 import copy
     46 import getopt
     47 import math  # for log
     48 import os
     49 import re
     50 import sre_compile
     51 import string
     52 import sys
     53 import unicodedata
     54 
     55 
     56 _USAGE = """
     57 Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
     58                    [--counting=total|toplevel|detailed] [--root=subdir]
     59                    [--linelength=digits]
     60         <file> [file] ...
     61 
     62   The style guidelines this tries to follow are those in
     63     https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
     64 
     65   Every problem is given a confidence score from 1-5, with 5 meaning we are
     66   certain of the problem, and 1 meaning it could be a legitimate construct.
     67   This will miss some errors, and is not a substitute for a code review.
     68 
     69   To suppress false-positive errors of a certain category, add a
     70   'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
     71   suppresses errors of all categories on that line.
     72 
     73   The files passed in will be linted; at least one file must be provided.
     74   Default linted extensions are .cc, .cpp, .cu, .cuh and .h.  Change the
     75   extensions with the --extensions flag.
     76 
     77   Flags:
     78 
     79     output=vs7
     80       By default, the output is formatted to ease emacs parsing.  Visual Studio
     81       compatible output (vs7) may also be used.  Other formats are unsupported.
     82 
     83     verbose=#
     84       Specify a number 0-5 to restrict errors to certain verbosity levels.
     85 
     86     filter=-x,+y,...
     87       Specify a comma-separated list of category-filters to apply: only
     88       error messages whose category names pass the filters will be printed.
     89       (Category names are printed with the message and look like
     90       "[whitespace/indent]".)  Filters are evaluated left to right.
     91       "-FOO" and "FOO" means "do not print categories that start with FOO".
     92       "+FOO" means "do print categories that start with FOO".
     93 
     94       Examples: --filter=-whitespace,+whitespace/braces
     95                 --filter=whitespace,runtime/printf,+runtime/printf_format
     96                 --filter=-,+build/include_what_you_use
     97 
     98       To see a list of all the categories used in cpplint, pass no arg:
     99          --filter=
    100 
    101     counting=total|toplevel|detailed
    102       The total number of errors found is always printed. If
    103       'toplevel' is provided, then the count of errors in each of
    104       the top-level categories like 'build' and 'whitespace' will
    105       also be printed. If 'detailed' is provided, then a count
    106       is provided for each category like 'build/class'.
    107 
    108     root=subdir
    109       The root directory used for deriving header guard CPP variable.
    110       By default, the header guard CPP variable is calculated as the relative
    111       path to the directory that contains .git, .hg, or .svn.  When this flag
    112       is specified, the relative path is calculated from the specified
    113       directory. If the specified directory does not exist, this flag is
    114       ignored.
    115 
    116       Examples:
    117         Assuming that src/.git exists, the header guard CPP variables for
    118         src/chrome/browser/ui/browser.h are:
    119 
    120         No flag => CHROME_BROWSER_UI_BROWSER_H_
    121         --root=chrome => BROWSER_UI_BROWSER_H_
    122         --root=chrome/browser => UI_BROWSER_H_
    123 
    124     linelength=digits
    125       This is the allowed line length for the project. The default value is
    126       80 characters.
    127 
    128       Examples:
    129         --linelength=120
    130 
    131     extensions=extension,extension,...
    132       The allowed file extensions that cpplint will check
    133 
    134       Examples:
    135         --extensions=hpp,cpp
    136 
    137     cpplint.py supports per-directory configurations specified in CPPLINT.cfg
    138     files. CPPLINT.cfg file can contain a number of key=value pairs.
    139     Currently the following options are supported:
    140 
    141       set noparent
    142       filter=+filter1,-filter2,...
    143       exclude_files=regex
    144       linelength=80
    145       root=subdir
    146 
    147     "set noparent" option prevents cpplint from traversing directory tree
    148     upwards looking for more .cfg files in parent directories. This option
    149     is usually placed in the top-level project directory.
    150 
    151     The "filter" option is similar in function to --filter flag. It specifies
    152     message filters in addition to the |_DEFAULT_FILTERS| and those specified
    153     through --filter command-line flag.
    154 
    155     "exclude_files" allows to specify a regular expression to be matched against
    156     a file name. If the expression matches, the file is skipped and not run
    157     through liner.
    158 
    159     "linelength" allows to specify the allowed line length for the project.
    160 
    161     The "root" option is similar in function to the --root flag (see example
    162     above).
    163 
    164     CPPLINT.cfg has an effect on files in the same directory and all
    165     sub-directories, unless overridden by a nested configuration file.
    166 
    167       Example file:
    168         filter=-build/include_order,+build/include_alpha
    169         exclude_files=.*\.cc
    170 
    171     The above example disables build/include_order warning and enables
    172     build/include_alpha as well as excludes all .cc from being
    173     processed by linter, in the current directory (where the .cfg
    174     file is located) and all sub-directories.
    175 """
    176 
    177 # We categorize each error message we print.  Here are the categories.
    178 # We want an explicit list so we can list them all in cpplint --filter=.
    179 # If you add a new error message with a new category, add it to the list
    180 # here!  cpplint_unittest.py should tell you if you forget to do this.
    181 _ERROR_CATEGORIES = [
    182     'build/class',
    183     'build/c++11',
    184     'build/c++14',
    185     'build/c++tr1',
    186     'build/deprecated',
    187     'build/endif_comment',
    188     'build/explicit_make_pair',
    189     'build/forward_decl',
    190     'build/header_guard',
    191     'build/include',
    192     'build/include_alpha',
    193     'build/include_order',
    194     'build/include_what_you_use',
    195     'build/namespaces',
    196     'build/printf_format',
    197     'build/storage_class',
    198     'legal/copyright',
    199     'readability/alt_tokens',
    200     'readability/braces',
    201     'readability/casting',
    202     'readability/check',
    203     'readability/constructors',
    204     'readability/fn_size',
    205     'readability/inheritance',
    206     'readability/multiline_comment',
    207     'readability/multiline_string',
    208     'readability/namespace',
    209     'readability/nolint',
    210     'readability/nul',
    211     'readability/strings',
    212     'readability/todo',
    213     'readability/utf8',
    214     'runtime/arrays',
    215     'runtime/casting',
    216     'runtime/explicit',
    217     'runtime/int',
    218     'runtime/init',
    219     'runtime/invalid_increment',
    220     'runtime/member_string_references',
    221     'runtime/memset',
    222     'runtime/indentation_namespace',
    223     'runtime/operator',
    224     'runtime/printf',
    225     'runtime/printf_format',
    226     'runtime/references',
    227     'runtime/string',
    228     'runtime/threadsafe_fn',
    229     'runtime/vlog',
    230     'whitespace/blank_line',
    231     'whitespace/braces',
    232     'whitespace/comma',
    233     'whitespace/comments',
    234     'whitespace/empty_conditional_body',
    235     'whitespace/empty_if_body',
    236     'whitespace/empty_loop_body',
    237     'whitespace/end_of_line',
    238     'whitespace/ending_newline',
    239     'whitespace/forcolon',
    240     'whitespace/indent',
    241     'whitespace/line_length',
    242     'whitespace/newline',
    243     'whitespace/operators',
    244     'whitespace/parens',
    245     'whitespace/semicolon',
    246     'whitespace/tab',
    247     'whitespace/todo',
    248     ]
    249 
    250 # These error categories are no longer enforced by cpplint, but for backwards-
    251 # compatibility they may still appear in NOLINT comments.
    252 _LEGACY_ERROR_CATEGORIES = [
    253     'readability/streams',
    254     'readability/function',
    255     ]
    256 
    257 # The default state of the category filter. This is overridden by the --filter=
    258 # flag. By default all errors are on, so only add here categories that should be
    259 # off by default (i.e., categories that must be enabled by the --filter= flags).
    260 # All entries here should start with a '-' or '+', as in the --filter= flag.
    261 _DEFAULT_FILTERS = ['-build/include_alpha']
    262 
    263 # The default list of categories suppressed for C (not C++) files.
    264 _DEFAULT_C_SUPPRESSED_CATEGORIES = [
    265     'readability/casting',
    266     ]
    267 
    268 # The default list of categories suppressed for Linux Kernel files.
    269 _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [
    270     'whitespace/tab',
    271     ]
    272 
    273 # We used to check for high-bit characters, but after much discussion we
    274 # decided those were OK, as long as they were in UTF-8 and didn't represent
    275 # hard-coded international strings, which belong in a separate i18n file.
    276 
    277 # C++ headers
    278 _CPP_HEADERS = frozenset([
    279     # Legacy
    280     'algobase.h',
    281     'algo.h',
    282     'alloc.h',
    283     'builtinbuf.h',
    284     'bvector.h',
    285     'complex.h',
    286     'defalloc.h',
    287     'deque.h',
    288     'editbuf.h',
    289     'fstream.h',
    290     'function.h',
    291     'hash_map',
    292     'hash_map.h',
    293     'hash_set',
    294     'hash_set.h',
    295     'hashtable.h',
    296     'heap.h',
    297     'indstream.h',
    298     'iomanip.h',
    299     'iostream.h',
    300     'istream.h',
    301     'iterator.h',
    302     'list.h',
    303     'map.h',
    304     'multimap.h',
    305     'multiset.h',
    306     'ostream.h',
    307     'pair.h',
    308     'parsestream.h',
    309     'pfstream.h',
    310     'procbuf.h',
    311     'pthread_alloc',
    312     'pthread_alloc.h',
    313     'rope',
    314     'rope.h',
    315     'ropeimpl.h',
    316     'set.h',
    317     'slist',
    318     'slist.h',
    319     'stack.h',
    320     'stdiostream.h',
    321     'stl_alloc.h',
    322     'stl_relops.h',
    323     'streambuf.h',
    324     'stream.h',
    325     'strfile.h',
    326     'strstream.h',
    327     'tempbuf.h',
    328     'tree.h',
    329     'type_traits.h',
    330     'vector.h',
    331     # 17.6.1.2 C++ library headers
    332     'algorithm',
    333     'array',
    334     'atomic',
    335     'bitset',
    336     'chrono',
    337     'codecvt',
    338     'complex',
    339     'condition_variable',
    340     'deque',
    341     'exception',
    342     'forward_list',
    343     'fstream',
    344     'functional',
    345     'future',
    346     'initializer_list',
    347     'iomanip',
    348     'ios',
    349     'iosfwd',
    350     'iostream',
    351     'istream',
    352     'iterator',
    353     'limits',
    354     'list',
    355     'locale',
    356     'map',
    357     'memory',
    358     'mutex',
    359     'new',
    360     'numeric',
    361     'ostream',
    362     'queue',
    363     'random',
    364     'ratio',
    365     'regex',
    366     'scoped_allocator',
    367     'set',
    368     'sstream',
    369     'stack',
    370     'stdexcept',
    371     'streambuf',
    372     'string',
    373     'strstream',
    374     'system_error',
    375     'thread',
    376     'tuple',
    377     'typeindex',
    378     'typeinfo',
    379     'type_traits',
    380     'unordered_map',
    381     'unordered_set',
    382     'utility',
    383     'valarray',
    384     'vector',
    385     # 17.6.1.2 C++ headers for C library facilities
    386     'cassert',
    387     'ccomplex',
    388     'cctype',
    389     'cerrno',
    390     'cfenv',
    391     'cfloat',
    392     'cinttypes',
    393     'ciso646',
    394     'climits',
    395     'clocale',
    396     'cmath',
    397     'csetjmp',
    398     'csignal',
    399     'cstdalign',
    400     'cstdarg',
    401     'cstdbool',
    402     'cstddef',
    403     'cstdint',
    404     'cstdio',
    405     'cstdlib',
    406     'cstring',
    407     'ctgmath',
    408     'ctime',
    409     'cuchar',
    410     'cwchar',
    411     'cwctype',
    412     ])
    413 
    414 # Type names
    415 _TYPES = re.compile(
    416     r'^(?:'
    417     # [dcl.type.simple]
    418     r'(char(16_t|32_t)?)|wchar_t|'
    419     r'bool|short|int|long|signed|unsigned|float|double|'
    420     # [support.types]
    421     r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|'
    422     # [cstdint.syn]
    423     r'(u?int(_fast|_least)?(8|16|32|64)_t)|'
    424     r'(u?int(max|ptr)_t)|'
    425     r')$')
    426 
    427 
    428 # These headers are excluded from [build/include] and [build/include_order]
    429 # checks:
    430 # - Anything not following google file name conventions (containing an
    431 #   uppercase character, such as Python.h or nsStringAPI.h, for example).
    432 # - Lua headers.
    433 _THIRD_PARTY_HEADERS_PATTERN = re.compile(
    434     r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$')
    435 
    436 # Pattern for matching FileInfo.BaseName() against test file name
    437 _TEST_FILE_SUFFIX = r'(_test|_unittest|_regtest)$'
    438 
    439 # Pattern that matches only complete whitespace, possibly across multiple lines.
    440 _EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL)
    441 
    442 # Assertion macros.  These are defined in base/logging.h and
    443 # testing/base/public/gunit.h.
    444 _CHECK_MACROS = [
    445     'DCHECK', 'CHECK',
    446     'EXPECT_TRUE', 'ASSERT_TRUE',
    447     'EXPECT_FALSE', 'ASSERT_FALSE',
    448     ]
    449 
    450 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
    451 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
    452 
    453 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
    454                         ('>=', 'GE'), ('>', 'GT'),
    455                         ('<=', 'LE'), ('<', 'LT')]:
    456   _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
    457   _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
    458   _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    459   _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    460 
    461 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
    462                             ('>=', 'LT'), ('>', 'LE'),
    463                             ('<=', 'GT'), ('<', 'GE')]:
    464   _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    465   _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    466 
    467 # Alternative tokens and their replacements.  For full list, see section 2.5
    468 # Alternative tokens [lex.digraph] in the C++ standard.
    469 #
    470 # Digraphs (such as '%:') are not included here since it's a mess to
    471 # match those on a word boundary.
    472 _ALT_TOKEN_REPLACEMENT = {
    473     'and': '&&',
    474     'bitor': '|',
    475     'or': '||',
    476     'xor': '^',
    477     'compl': '~',
    478     'bitand': '&',
    479     'and_eq': '&=',
    480     'or_eq': '|=',
    481     'xor_eq': '^=',
    482     'not': '!',
    483     'not_eq': '!='
    484     }
    485 
    486 # Compile regular expression that matches all the above keywords.  The "[ =()]"
    487 # bit is meant to avoid matching these keywords outside of boolean expressions.
    488 #
    489 # False positives include C-style multi-line comments and multi-line strings
    490 # but those have always been troublesome for cpplint.
    491 _ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
    492     r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
    493 
    494 
    495 # These constants define types of headers for use with
    496 # _IncludeState.CheckNextIncludeOrder().
    497 _C_SYS_HEADER = 1
    498 _CPP_SYS_HEADER = 2
    499 _LIKELY_MY_HEADER = 3
    500 _POSSIBLE_MY_HEADER = 4
    501 _OTHER_HEADER = 5
    502 
    503 # These constants define the current inline assembly state
    504 _NO_ASM = 0       # Outside of inline assembly block
    505 _INSIDE_ASM = 1   # Inside inline assembly block
    506 _END_ASM = 2      # Last line of inline assembly block
    507 _BLOCK_ASM = 3    # The whole block is an inline assembly block
    508 
    509 # Match start of assembly blocks
    510 _MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
    511                         r'(?:\s+(volatile|__volatile__))?'
    512                         r'\s*[{(]')
    513 
    514 # Match strings that indicate we're working on a C (not C++) file.
    515 _SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|'
    516                             r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))')
    517 
    518 # Match string that indicates we're working on a Linux Kernel file.
    519 _SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)')
    520 
    521 _regexp_compile_cache = {}
    522 
    523 # {str, set(int)}: a map from error categories to sets of linenumbers
    524 # on which those errors are expected and should be suppressed.
    525 _error_suppressions = {}
    526 
    527 # The root directory used for deriving header guard CPP variable.
    528 # This is set by --root flag.
    529 _root = None
    530 
    531 # The allowed line length of files.
    532 # This is set by --linelength flag.
    533 _line_length = 80
    534 
    535 # The allowed extensions for file names
    536 # This is set by --extensions flag.
    537 _valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh'])
    538 
    539 # {str, bool}: a map from error categories to booleans which indicate if the
    540 # category should be suppressed for every line.
    541 _global_error_suppressions = {}
    542 
    543 
    544 def ParseNolintSuppressions(filename, raw_line, linenum, error):
    545   """Updates the global list of line error-suppressions.
    546 
    547   Parses any NOLINT comments on the current line, updating the global
    548   error_suppressions store.  Reports an error if the NOLINT comment
    549   was malformed.
    550 
    551   Args:
    552     filename: str, the name of the input file.
    553     raw_line: str, the line of input text, with comments.
    554     linenum: int, the number of the current line.
    555     error: function, an error handler.
    556   """
    557   matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line)
    558   if matched:
    559     if matched.group(1):
    560       suppressed_line = linenum + 1
    561     else:
    562       suppressed_line = linenum
    563     category = matched.group(2)
    564     if category in (None, '(*)'):  # => "suppress all"
    565       _error_suppressions.setdefault(None, set()).add(suppressed_line)
    566     else:
    567       if category.startswith('(') and category.endswith(')'):
    568         category = category[1:-1]
    569         if category in _ERROR_CATEGORIES:
    570           _error_suppressions.setdefault(category, set()).add(suppressed_line)
    571         elif category not in _LEGACY_ERROR_CATEGORIES:
    572           error(filename, linenum, 'readability/nolint', 5,
    573                 'Unknown NOLINT error category: %s' % category)
    574 
    575 
    576 def ProcessGlobalSuppresions(lines):
    577   """Updates the list of global error suppressions.
    578 
    579   Parses any lint directives in the file that have global effect.
    580 
    581   Args:
    582     lines: An array of strings, each representing a line of the file, with the
    583            last element being empty if the file is terminated with a newline.
    584   """
    585   for line in lines:
    586     if _SEARCH_C_FILE.search(line):
    587       for category in _DEFAULT_C_SUPPRESSED_CATEGORIES:
    588         _global_error_suppressions[category] = True
    589     if _SEARCH_KERNEL_FILE.search(line):
    590       for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES:
    591         _global_error_suppressions[category] = True
    592 
    593 
    594 def ResetNolintSuppressions():
    595   """Resets the set of NOLINT suppressions to empty."""
    596   _error_suppressions.clear()
    597   _global_error_suppressions.clear()
    598 
    599 
    600 def IsErrorSuppressedByNolint(category, linenum):
    601   """Returns true if the specified error category is suppressed on this line.
    602 
    603   Consults the global error_suppressions map populated by
    604   ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions.
    605 
    606   Args:
    607     category: str, the category of the error.
    608     linenum: int, the current line number.
    609   Returns:
    610     bool, True iff the error should be suppressed due to a NOLINT comment or
    611     global suppression.
    612   """
    613   return (_global_error_suppressions.get(category, False) or
    614           linenum in _error_suppressions.get(category, set()) or
    615           linenum in _error_suppressions.get(None, set()))
    616 
    617 
    618 def Match(pattern, s):
    619   """Matches the string with the pattern, caching the compiled regexp."""
    620   # The regexp compilation caching is inlined in both Match and Search for
    621   # performance reasons; factoring it out into a separate function turns out
    622   # to be noticeably expensive.
    623   if pattern not in _regexp_compile_cache:
    624     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    625   return _regexp_compile_cache[pattern].match(s)
    626 
    627 
    628 def ReplaceAll(pattern, rep, s):
    629   """Replaces instances of pattern in a string with a replacement.
    630 
    631   The compiled regex is kept in a cache shared by Match and Search.
    632 
    633   Args:
    634     pattern: regex pattern
    635     rep: replacement text
    636     s: search string
    637 
    638   Returns:
    639     string with replacements made (or original string if no replacements)
    640   """
    641   if pattern not in _regexp_compile_cache:
    642     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    643   return _regexp_compile_cache[pattern].sub(rep, s)
    644 
    645 
    646 def Search(pattern, s):
    647   """Searches the string for the pattern, caching the compiled regexp."""
    648   if pattern not in _regexp_compile_cache:
    649     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    650   return _regexp_compile_cache[pattern].search(s)
    651 
    652 
    653 def _IsSourceExtension(s):
    654   """File extension (excluding dot) matches a source file extension."""
    655   return s in ('c', 'cc', 'cpp', 'cxx')
    656 
    657 
    658 class _IncludeState(object):
    659   """Tracks line numbers for includes, and the order in which includes appear.
    660 
    661   include_list contains list of lists of (header, line number) pairs.
    662   It's a lists of lists rather than just one flat list to make it
    663   easier to update across preprocessor boundaries.
    664 
    665   Call CheckNextIncludeOrder() once for each header in the file, passing
    666   in the type constants defined above. Calls in an illegal order will
    667   raise an _IncludeError with an appropriate error message.
    668 
    669   """
    670   # self._section will move monotonically through this set. If it ever
    671   # needs to move backwards, CheckNextIncludeOrder will raise an error.
    672   _INITIAL_SECTION = 0
    673   _MY_H_SECTION = 1
    674   _C_SECTION = 2
    675   _CPP_SECTION = 3
    676   _OTHER_H_SECTION = 4
    677 
    678   _TYPE_NAMES = {
    679       _C_SYS_HEADER: 'C system header',
    680       _CPP_SYS_HEADER: 'C++ system header',
    681       _LIKELY_MY_HEADER: 'header this file implements',
    682       _POSSIBLE_MY_HEADER: 'header this file may implement',
    683       _OTHER_HEADER: 'other header',
    684       }
    685   _SECTION_NAMES = {
    686       _INITIAL_SECTION: "... nothing. (This can't be an error.)",
    687       _MY_H_SECTION: 'a header this file implements',
    688       _C_SECTION: 'C system header',
    689       _CPP_SECTION: 'C++ system header',
    690       _OTHER_H_SECTION: 'other header',
    691       }
    692 
    693   def __init__(self):
    694     self.include_list = [[]]
    695     self.ResetSection('')
    696 
    697   def FindHeader(self, header):
    698     """Check if a header has already been included.
    699 
    700     Args:
    701       header: header to check.
    702     Returns:
    703       Line number of previous occurrence, or -1 if the header has not
    704       been seen before.
    705     """
    706     for section_list in self.include_list:
    707       for f in section_list:
    708         if f[0] == header:
    709           return f[1]
    710     return -1
    711 
    712   def ResetSection(self, directive):
    713     """Reset section checking for preprocessor directive.
    714 
    715     Args:
    716       directive: preprocessor directive (e.g. "if", "else").
    717     """
    718     # The name of the current section.
    719     self._section = self._INITIAL_SECTION
    720     # The path of last found header.
    721     self._last_header = ''
    722 
    723     # Update list of includes.  Note that we never pop from the
    724     # include list.
    725     if directive in ('if', 'ifdef', 'ifndef'):
    726       self.include_list.append([])
    727     elif directive in ('else', 'elif'):
    728       self.include_list[-1] = []
    729 
    730   def SetLastHeader(self, header_path):
    731     self._last_header = header_path
    732 
    733   def CanonicalizeAlphabeticalOrder(self, header_path):
    734     """Returns a path canonicalized for alphabetical comparison.
    735 
    736     - replaces "-" with "_" so they both cmp the same.
    737     - removes '-inl' since we don't require them to be after the main header.
    738     - lowercase everything, just in case.
    739 
    740     Args:
    741       header_path: Path to be canonicalized.
    742 
    743     Returns:
    744       Canonicalized path.
    745     """
    746     return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
    747 
    748   def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path):
    749     """Check if a header is in alphabetical order with the previous header.
    750 
    751     Args:
    752       clean_lines: A CleansedLines instance containing the file.
    753       linenum: The number of the line to check.
    754       header_path: Canonicalized header to be checked.
    755 
    756     Returns:
    757       Returns true if the header is in alphabetical order.
    758     """
    759     # If previous section is different from current section, _last_header will
    760     # be reset to empty string, so it's always less than current header.
    761     #
    762     # If previous line was a blank line, assume that the headers are
    763     # intentionally sorted the way they are.
    764     if (self._last_header > header_path and
    765         Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])):
    766       return False
    767     return True
    768 
    769   def CheckNextIncludeOrder(self, header_type):
    770     """Returns a non-empty error message if the next header is out of order.
    771 
    772     This function also updates the internal state to be ready to check
    773     the next include.
    774 
    775     Args:
    776       header_type: One of the _XXX_HEADER constants defined above.
    777 
    778     Returns:
    779       The empty string if the header is in the right order, or an
    780       error message describing what's wrong.
    781 
    782     """
    783     error_message = ('Found %s after %s' %
    784                      (self._TYPE_NAMES[header_type],
    785                       self._SECTION_NAMES[self._section]))
    786 
    787     last_section = self._section
    788 
    789     if header_type == _C_SYS_HEADER:
    790       if self._section <= self._C_SECTION:
    791         self._section = self._C_SECTION
    792       else:
    793         self._last_header = ''
    794         return error_message
    795     elif header_type == _CPP_SYS_HEADER:
    796       if self._section <= self._CPP_SECTION:
    797         self._section = self._CPP_SECTION
    798       else:
    799         self._last_header = ''
    800         return error_message
    801     elif header_type == _LIKELY_MY_HEADER:
    802       if self._section <= self._MY_H_SECTION:
    803         self._section = self._MY_H_SECTION
    804       else:
    805         self._section = self._OTHER_H_SECTION
    806     elif header_type == _POSSIBLE_MY_HEADER:
    807       if self._section <= self._MY_H_SECTION:
    808         self._section = self._MY_H_SECTION
    809       else:
    810         # This will always be the fallback because we're not sure
    811         # enough that the header is associated with this file.
    812         self._section = self._OTHER_H_SECTION
    813     else:
    814       assert header_type == _OTHER_HEADER
    815       self._section = self._OTHER_H_SECTION
    816 
    817     if last_section != self._section:
    818       self._last_header = ''
    819 
    820     return ''
    821 
    822 
    823 class _CppLintState(object):
    824   """Maintains module-wide state.."""
    825 
    826   def __init__(self):
    827     self.verbose_level = 1  # global setting.
    828     self.error_count = 0    # global count of reported errors
    829     # filters to apply when emitting error messages
    830     self.filters = _DEFAULT_FILTERS[:]
    831     # backup of filter list. Used to restore the state after each file.
    832     self._filters_backup = self.filters[:]
    833     self.counting = 'total'  # In what way are we counting errors?
    834     self.errors_by_category = {}  # string to int dict storing error counts
    835 
    836     # output format:
    837     # "emacs" - format that emacs can parse (default)
    838     # "vs7" - format that Microsoft Visual Studio 7 can parse
    839     self.output_format = 'emacs'
    840 
    841   def SetOutputFormat(self, output_format):
    842     """Sets the output format for errors."""
    843     self.output_format = output_format
    844 
    845   def SetVerboseLevel(self, level):
    846     """Sets the module's verbosity, and returns the previous setting."""
    847     last_verbose_level = self.verbose_level
    848     self.verbose_level = level
    849     return last_verbose_level
    850 
    851   def SetCountingStyle(self, counting_style):
    852     """Sets the module's counting options."""
    853     self.counting = counting_style
    854 
    855   def SetFilters(self, filters):
    856     """Sets the error-message filters.
    857 
    858     These filters are applied when deciding whether to emit a given
    859     error message.
    860 
    861     Args:
    862       filters: A string of comma-separated filters (eg "+whitespace/indent").
    863                Each filter should start with + or -; else we die.
    864 
    865     Raises:
    866       ValueError: The comma-separated filters did not all start with '+' or '-'.
    867                   E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
    868     """
    869     # Default filters always have less priority than the flag ones.
    870     self.filters = _DEFAULT_FILTERS[:]
    871     self.AddFilters(filters)
    872 
    873   def AddFilters(self, filters):
    874     """ Adds more filters to the existing list of error-message filters. """
    875     for filt in filters.split(','):
    876       clean_filt = filt.strip()
    877       if clean_filt:
    878         self.filters.append(clean_filt)
    879     for filt in self.filters:
    880       if not (filt.startswith('+') or filt.startswith('-')):
    881         raise ValueError('Every filter in --filters must start with + or -'
    882                          ' (%s does not)' % filt)
    883 
    884   def BackupFilters(self):
    885     """ Saves the current filter list to backup storage."""
    886     self._filters_backup = self.filters[:]
    887 
    888   def RestoreFilters(self):
    889     """ Restores filters previously backed up."""
    890     self.filters = self._filters_backup[:]
    891 
    892   def ResetErrorCounts(self):
    893     """Sets the module's error statistic back to zero."""
    894     self.error_count = 0
    895     self.errors_by_category = {}
    896 
    897   def IncrementErrorCount(self, category):
    898     """Bumps the module's error statistic."""
    899     self.error_count += 1
    900     if self.counting in ('toplevel', 'detailed'):
    901       if self.counting != 'detailed':
    902         category = category.split('/')[0]
    903       if category not in self.errors_by_category:
    904         self.errors_by_category[category] = 0
    905       self.errors_by_category[category] += 1
    906 
    907   def PrintErrorCounts(self):
    908     """Print a summary of errors by category, and the total."""
    909     for category, count in self.errors_by_category.iteritems():
    910       sys.stderr.write('Category \'%s\' errors found: %d\n' %
    911                        (category, count))
    912     sys.stderr.write('Total errors found: %d\n' % self.error_count)
    913 
    914 _cpplint_state = _CppLintState()
    915 
    916 
    917 def _OutputFormat():
    918   """Gets the module's output format."""
    919   return _cpplint_state.output_format
    920 
    921 
    922 def _SetOutputFormat(output_format):
    923   """Sets the module's output format."""
    924   _cpplint_state.SetOutputFormat(output_format)
    925 
    926 
    927 def _VerboseLevel():
    928   """Returns the module's verbosity setting."""
    929   return _cpplint_state.verbose_level
    930 
    931 
    932 def _SetVerboseLevel(level):
    933   """Sets the module's verbosity, and returns the previous setting."""
    934   return _cpplint_state.SetVerboseLevel(level)
    935 
    936 
    937 def _SetCountingStyle(level):
    938   """Sets the module's counting options."""
    939   _cpplint_state.SetCountingStyle(level)
    940 
    941 
    942 def _Filters():
    943   """Returns the module's list of output filters, as a list."""
    944   return _cpplint_state.filters
    945 
    946 
    947 def _SetFilters(filters):
    948   """Sets the module's error-message filters.
    949 
    950   These filters are applied when deciding whether to emit a given
    951   error message.
    952 
    953   Args:
    954     filters: A string of comma-separated filters (eg "whitespace/indent").
    955              Each filter should start with + or -; else we die.
    956   """
    957   _cpplint_state.SetFilters(filters)
    958 
    959 def _AddFilters(filters):
    960   """Adds more filter overrides.
    961 
    962   Unlike _SetFilters, this function does not reset the current list of filters
    963   available.
    964 
    965   Args:
    966     filters: A string of comma-separated filters (eg "whitespace/indent").
    967              Each filter should start with + or -; else we die.
    968   """
    969   _cpplint_state.AddFilters(filters)
    970 
    971 def _BackupFilters():
    972   """ Saves the current filter list to backup storage."""
    973   _cpplint_state.BackupFilters()
    974 
    975 def _RestoreFilters():
    976   """ Restores filters previously backed up."""
    977   _cpplint_state.RestoreFilters()
    978 
    979 class _FunctionState(object):
    980   """Tracks current function name and the number of lines in its body."""
    981 
    982   _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    983   _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
    984 
    985   def __init__(self):
    986     self.in_a_function = False
    987     self.lines_in_function = 0
    988     self.current_function = ''
    989 
    990   def Begin(self, function_name):
    991     """Start analyzing function body.
    992 
    993     Args:
    994       function_name: The name of the function being tracked.
    995     """
    996     self.in_a_function = True
    997     self.lines_in_function = 0
    998     self.current_function = function_name
    999 
   1000   def Count(self):
   1001     """Count line in current function body."""
   1002     if self.in_a_function:
   1003       self.lines_in_function += 1
   1004 
   1005   def Check(self, error, filename, linenum):
   1006     """Report if too many lines in function body.
   1007 
   1008     Args:
   1009       error: The function to call with any errors found.
   1010       filename: The name of the current file.
   1011       linenum: The number of the line to check.
   1012     """
   1013     if not self.in_a_function:
   1014       return
   1015 
   1016     if Match(r'T(EST|est)', self.current_function):
   1017       base_trigger = self._TEST_TRIGGER
   1018     else:
   1019       base_trigger = self._NORMAL_TRIGGER
   1020     trigger = base_trigger * 2**_VerboseLevel()
   1021 
   1022     if self.lines_in_function > trigger:
   1023       error_level = int(math.log(self.lines_in_function / base_trigger, 2))
   1024       # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
   1025       if error_level > 5:
   1026         error_level = 5
   1027       error(filename, linenum, 'readability/fn_size', error_level,
   1028             'Small and focused functions are preferred:'
   1029             ' %s has %d non-comment lines'
   1030             ' (error triggered by exceeding %d lines).'  % (
   1031                 self.current_function, self.lines_in_function, trigger))
   1032 
   1033   def End(self):
   1034     """Stop analyzing function body."""
   1035     self.in_a_function = False
   1036 
   1037 
   1038 class _IncludeError(Exception):
   1039   """Indicates a problem with the include order in a file."""
   1040   pass
   1041 
   1042 
   1043 class FileInfo(object):
   1044   """Provides utility functions for filenames.
   1045 
   1046   FileInfo provides easy access to the components of a file's path
   1047   relative to the project root.
   1048   """
   1049 
   1050   def __init__(self, filename):
   1051     self._filename = filename
   1052 
   1053   def FullName(self):
   1054     """Make Windows paths like Unix."""
   1055     return os.path.abspath(self._filename).replace('\\', '/')
   1056 
   1057   def RepositoryName(self):
   1058     """FullName after removing the local path to the repository.
   1059 
   1060     If we have a real absolute path name here we can try to do something smart:
   1061     detecting the root of the checkout and truncating /path/to/checkout from
   1062     the name so that we get header guards that don't include things like
   1063     "C:\Documents and Settings\..." or "/home/username/..." in them and thus
   1064     people on different computers who have checked the source out to different
   1065     locations won't see bogus errors.
   1066     """
   1067     fullname = self.FullName()
   1068 
   1069     if os.path.exists(fullname):
   1070       project_dir = os.path.dirname(fullname)
   1071 
   1072       if os.path.exists(os.path.join(project_dir, ".svn")):
   1073         # If there's a .svn file in the current directory, we recursively look
   1074         # up the directory tree for the top of the SVN checkout
   1075         root_dir = project_dir
   1076         one_up_dir = os.path.dirname(root_dir)
   1077         while os.path.exists(os.path.join(one_up_dir, ".svn")):
   1078           root_dir = os.path.dirname(root_dir)
   1079           one_up_dir = os.path.dirname(one_up_dir)
   1080 
   1081         prefix = os.path.commonprefix([root_dir, project_dir])
   1082         return fullname[len(prefix) + 1:]
   1083 
   1084       # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
   1085       # searching up from the current path.
   1086       root_dir = current_dir = os.path.dirname(fullname)
   1087       while current_dir != os.path.dirname(current_dir):
   1088         if (os.path.exists(os.path.join(current_dir, ".git")) or
   1089             os.path.exists(os.path.join(current_dir, ".hg")) or
   1090             os.path.exists(os.path.join(current_dir, ".svn"))):
   1091           root_dir = current_dir
   1092         current_dir = os.path.dirname(current_dir)
   1093 
   1094       if (os.path.exists(os.path.join(root_dir, ".git")) or
   1095           os.path.exists(os.path.join(root_dir, ".hg")) or
   1096           os.path.exists(os.path.join(root_dir, ".svn"))):
   1097         prefix = os.path.commonprefix([root_dir, project_dir])
   1098         return fullname[len(prefix) + 1:]
   1099 
   1100     # Don't know what to do; header guard warnings may be wrong...
   1101     return fullname
   1102 
   1103   def Split(self):
   1104     """Splits the file into the directory, basename, and extension.
   1105 
   1106     For 'chrome/browser/browser.cc', Split() would
   1107     return ('chrome/browser', 'browser', '.cc')
   1108 
   1109     Returns:
   1110       A tuple of (directory, basename, extension).
   1111     """
   1112 
   1113     googlename = self.RepositoryName()
   1114     project, rest = os.path.split(googlename)
   1115     return (project,) + os.path.splitext(rest)
   1116 
   1117   def BaseName(self):
   1118     """File base name - text after the final slash, before the final period."""
   1119     return self.Split()[1]
   1120 
   1121   def Extension(self):
   1122     """File extension - text following the final period."""
   1123     return self.Split()[2]
   1124 
   1125   def NoExtension(self):
   1126     """File has no source file extension."""
   1127     return '/'.join(self.Split()[0:2])
   1128 
   1129   def IsSource(self):
   1130     """File has a source file extension."""
   1131     return _IsSourceExtension(self.Extension()[1:])
   1132 
   1133 
   1134 def _ShouldPrintError(category, confidence, linenum):
   1135   """If confidence >= verbose, category passes filter and is not suppressed."""
   1136 
   1137   # There are three ways we might decide not to print an error message:
   1138   # a "NOLINT(category)" comment appears in the source,
   1139   # the verbosity level isn't high enough, or the filters filter it out.
   1140   if IsErrorSuppressedByNolint(category, linenum):
   1141     return False
   1142 
   1143   if confidence < _cpplint_state.verbose_level:
   1144     return False
   1145 
   1146   is_filtered = False
   1147   for one_filter in _Filters():
   1148     if one_filter.startswith('-'):
   1149       if category.startswith(one_filter[1:]):
   1150         is_filtered = True
   1151     elif one_filter.startswith('+'):
   1152       if category.startswith(one_filter[1:]):
   1153         is_filtered = False
   1154     else:
   1155       assert False  # should have been checked for in SetFilter.
   1156   if is_filtered:
   1157     return False
   1158 
   1159   return True
   1160 
   1161 
   1162 def Error(filename, linenum, category, confidence, message):
   1163   """Logs the fact we've found a lint error.
   1164 
   1165   We log where the error was found, and also our confidence in the error,
   1166   that is, how certain we are this is a legitimate style regression, and
   1167   not a misidentification or a use that's sometimes justified.
   1168 
   1169   False positives can be suppressed by the use of
   1170   "cpplint(category)"  comments on the offending line.  These are
   1171   parsed into _error_suppressions.
   1172 
   1173   Args:
   1174     filename: The name of the file containing the error.
   1175     linenum: The number of the line containing the error.
   1176     category: A string used to describe the "category" this bug
   1177       falls under: "whitespace", say, or "runtime".  Categories
   1178       may have a hierarchy separated by slashes: "whitespace/indent".
   1179     confidence: A number from 1-5 representing a confidence score for
   1180       the error, with 5 meaning that we are certain of the problem,
   1181       and 1 meaning that it could be a legitimate construct.
   1182     message: The error message.
   1183   """
   1184   if _ShouldPrintError(category, confidence, linenum):
   1185     _cpplint_state.IncrementErrorCount(category)
   1186     if _cpplint_state.output_format == 'vs7':
   1187       sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
   1188           filename, linenum, message, category, confidence))
   1189     elif _cpplint_state.output_format == 'eclipse':
   1190       sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
   1191           filename, linenum, message, category, confidence))
   1192     else:
   1193       sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
   1194           filename, linenum, message, category, confidence))
   1195 
   1196 
   1197 # Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard.
   1198 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
   1199     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
   1200 # Match a single C style comment on the same line.
   1201 _RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/'
   1202 # Matches multi-line C style comments.
   1203 # This RE is a little bit more complicated than one might expect, because we
   1204 # have to take care of space removals tools so we can handle comments inside
   1205 # statements better.
   1206 # The current rule is: We only clear spaces from both sides when we're at the
   1207 # end of the line. Otherwise, we try to remove spaces from the right side,
   1208 # if this doesn't work we try on left side but only if there's a non-character
   1209 # on the right.
   1210 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
   1211     r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' +
   1212     _RE_PATTERN_C_COMMENTS + r'\s+|' +
   1213     r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' +
   1214     _RE_PATTERN_C_COMMENTS + r')')
   1215 
   1216 
   1217 def IsCppString(line):
   1218   """Does line terminate so, that the next symbol is in string constant.
   1219 
   1220   This function does not consider single-line nor multi-line comments.
   1221 
   1222   Args:
   1223     line: is a partial line of code starting from the 0..n.
   1224 
   1225   Returns:
   1226     True, if next character appended to 'line' is inside a
   1227     string constant.
   1228   """
   1229 
   1230   line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
   1231   return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
   1232 
   1233 
   1234 def CleanseRawStrings(raw_lines):
   1235   """Removes C++11 raw strings from lines.
   1236 
   1237     Before:
   1238       static const char kData[] = R"(
   1239           multi-line string
   1240           )";
   1241 
   1242     After:
   1243       static const char kData[] = ""
   1244           (replaced by blank line)
   1245           "";
   1246 
   1247   Args:
   1248     raw_lines: list of raw lines.
   1249 
   1250   Returns:
   1251     list of lines with C++11 raw strings replaced by empty strings.
   1252   """
   1253 
   1254   delimiter = None
   1255   lines_without_raw_strings = []
   1256   for line in raw_lines:
   1257     if delimiter:
   1258       # Inside a raw string, look for the end
   1259       end = line.find(delimiter)
   1260       if end >= 0:
   1261         # Found the end of the string, match leading space for this
   1262         # line and resume copying the original lines, and also insert
   1263         # a "" on the last line.
   1264         leading_space = Match(r'^(\s*)\S', line)
   1265         line = leading_space.group(1) + '""' + line[end + len(delimiter):]
   1266         delimiter = None
   1267       else:
   1268         # Haven't found the end yet, append a blank line.
   1269         line = '""'
   1270 
   1271     # Look for beginning of a raw string, and replace them with
   1272     # empty strings.  This is done in a loop to handle multiple raw
   1273     # strings on the same line.
   1274     while delimiter is None:
   1275       # Look for beginning of a raw string.
   1276       # See 2.14.15 [lex.string] for syntax.
   1277       #
   1278       # Once we have matched a raw string, we check the prefix of the
   1279       # line to make sure that the line is not part of a single line
   1280       # comment.  It's done this way because we remove raw strings
   1281       # before removing comments as opposed to removing comments
   1282       # before removing raw strings.  This is because there are some
   1283       # cpplint checks that requires the comments to be preserved, but
   1284       # we don't want to check comments that are inside raw strings.
   1285       matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line)
   1286       if (matched and
   1287           not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//',
   1288                     matched.group(1))):
   1289         delimiter = ')' + matched.group(2) + '"'
   1290 
   1291         end = matched.group(3).find(delimiter)
   1292         if end >= 0:
   1293           # Raw string ended on same line
   1294           line = (matched.group(1) + '""' +
   1295                   matched.group(3)[end + len(delimiter):])
   1296           delimiter = None
   1297         else:
   1298           # Start of a multi-line raw string
   1299           line = matched.group(1) + '""'
   1300       else:
   1301         break
   1302 
   1303     lines_without_raw_strings.append(line)
   1304 
   1305   # TODO(unknown): if delimiter is not None here, we might want to
   1306   # emit a warning for unterminated string.
   1307   return lines_without_raw_strings
   1308 
   1309 
   1310 def FindNextMultiLineCommentStart(lines, lineix):
   1311   """Find the beginning marker for a multiline comment."""
   1312   while lineix < len(lines):
   1313     if lines[lineix].strip().startswith('/*'):
   1314       # Only return this marker if the comment goes beyond this line
   1315       if lines[lineix].strip().find('*/', 2) < 0:
   1316         return lineix
   1317     lineix += 1
   1318   return len(lines)
   1319 
   1320 
   1321 def FindNextMultiLineCommentEnd(lines, lineix):
   1322   """We are inside a comment, find the end marker."""
   1323   while lineix < len(lines):
   1324     if lines[lineix].strip().endswith('*/'):
   1325       return lineix
   1326     lineix += 1
   1327   return len(lines)
   1328 
   1329 
   1330 def RemoveMultiLineCommentsFromRange(lines, begin, end):
   1331   """Clears a range of lines for multi-line comments."""
   1332   # Having // dummy comments makes the lines non-empty, so we will not get
   1333   # unnecessary blank line warnings later in the code.
   1334   for i in range(begin, end):
   1335     lines[i] = '/**/'
   1336 
   1337 
   1338 def RemoveMultiLineComments(filename, lines, error):
   1339   """Removes multiline (c-style) comments from lines."""
   1340   lineix = 0
   1341   while lineix < len(lines):
   1342     lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
   1343     if lineix_begin >= len(lines):
   1344       return
   1345     lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
   1346     if lineix_end >= len(lines):
   1347       error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
   1348             'Could not find end of multi-line comment')
   1349       return
   1350     RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
   1351     lineix = lineix_end + 1
   1352 
   1353 
   1354 def CleanseComments(line):
   1355   """Removes //-comments and single-line C-style /* */ comments.
   1356 
   1357   Args:
   1358     line: A line of C++ source.
   1359 
   1360   Returns:
   1361     The line with single-line comments removed.
   1362   """
   1363   commentpos = line.find('//')
   1364   if commentpos != -1 and not IsCppString(line[:commentpos]):
   1365     line = line[:commentpos].rstrip()
   1366   # get rid of /* ... */
   1367   return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
   1368 
   1369 
   1370 class CleansedLines(object):
   1371   """Holds 4 copies of all lines with different preprocessing applied to them.
   1372 
   1373   1) elided member contains lines without strings and comments.
   1374   2) lines member contains lines without comments.
   1375   3) raw_lines member contains all the lines without processing.
   1376   4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw
   1377      strings removed.
   1378   All these members are of <type 'list'>, and of the same length.
   1379   """
   1380 
   1381   def __init__(self, lines):
   1382     self.elided = []
   1383     self.lines = []
   1384     self.raw_lines = lines
   1385     self.num_lines = len(lines)
   1386     self.lines_without_raw_strings = CleanseRawStrings(lines)
   1387     for linenum in range(len(self.lines_without_raw_strings)):
   1388       self.lines.append(CleanseComments(
   1389           self.lines_without_raw_strings[linenum]))
   1390       elided = self._CollapseStrings(self.lines_without_raw_strings[linenum])
   1391       self.elided.append(CleanseComments(elided))
   1392 
   1393   def NumLines(self):
   1394     """Returns the number of lines represented."""
   1395     return self.num_lines
   1396 
   1397   @staticmethod
   1398   def _CollapseStrings(elided):
   1399     """Collapses strings and chars on a line to simple "" or '' blocks.
   1400 
   1401     We nix strings first so we're not fooled by text like '"http://"'
   1402 
   1403     Args:
   1404       elided: The line being processed.
   1405 
   1406     Returns:
   1407       The line with collapsed strings.
   1408     """
   1409     if _RE_PATTERN_INCLUDE.match(elided):
   1410       return elided
   1411 
   1412     # Remove escaped characters first to make quote/single quote collapsing
   1413     # basic.  Things that look like escaped characters shouldn't occur
   1414     # outside of strings and chars.
   1415     elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
   1416 
   1417     # Replace quoted strings and digit separators.  Both single quotes
   1418     # and double quotes are processed in the same loop, otherwise
   1419     # nested quotes wouldn't work.
   1420     collapsed = ''
   1421     while True:
   1422       # Find the first quote character
   1423       match = Match(r'^([^\'"]*)([\'"])(.*)$', elided)
   1424       if not match:
   1425         collapsed += elided
   1426         break
   1427       head, quote, tail = match.groups()
   1428 
   1429       if quote == '"':
   1430         # Collapse double quoted strings
   1431         second_quote = tail.find('"')
   1432         if second_quote >= 0:
   1433           collapsed += head + '""'
   1434           elided = tail[second_quote + 1:]
   1435         else:
   1436           # Unmatched double quote, don't bother processing the rest
   1437           # of the line since this is probably a multiline string.
   1438           collapsed += elided
   1439           break
   1440       else:
   1441         # Found single quote, check nearby text to eliminate digit separators.
   1442         #
   1443         # There is no special handling for floating point here, because
   1444         # the integer/fractional/exponent parts would all be parsed
   1445         # correctly as long as there are digits on both sides of the
   1446         # separator.  So we are fine as long as we don't see something
   1447         # like "0.'3" (gcc 4.9.0 will not allow this literal).
   1448         if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head):
   1449           match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail)
   1450           collapsed += head + match_literal.group(1).replace("'", '')
   1451           elided = match_literal.group(2)
   1452         else:
   1453           second_quote = tail.find('\'')
   1454           if second_quote >= 0:
   1455             collapsed += head + "''"
   1456             elided = tail[second_quote + 1:]
   1457           else:
   1458             # Unmatched single quote
   1459             collapsed += elided
   1460             break
   1461 
   1462     return collapsed
   1463 
   1464 
   1465 def FindEndOfExpressionInLine(line, startpos, stack):
   1466   """Find the position just after the end of current parenthesized expression.
   1467 
   1468   Args:
   1469     line: a CleansedLines line.
   1470     startpos: start searching at this position.
   1471     stack: nesting stack at startpos.
   1472 
   1473   Returns:
   1474     On finding matching end: (index just after matching end, None)
   1475     On finding an unclosed expression: (-1, None)
   1476     Otherwise: (-1, new stack at end of this line)
   1477   """
   1478   for i in xrange(startpos, len(line)):
   1479     char = line[i]
   1480     if char in '([{':
   1481       # Found start of parenthesized expression, push to expression stack
   1482       stack.append(char)
   1483     elif char == '<':
   1484       # Found potential start of template argument list
   1485       if i > 0 and line[i - 1] == '<':
   1486         # Left shift operator
   1487         if stack and stack[-1] == '<':
   1488           stack.pop()
   1489           if not stack:
   1490             return (-1, None)
   1491       elif i > 0 and Search(r'\boperator\s*$', line[0:i]):
   1492         # operator<, don't add to stack
   1493         continue
   1494       else:
   1495         # Tentative start of template argument list
   1496         stack.append('<')
   1497     elif char in ')]}':
   1498       # Found end of parenthesized expression.
   1499       #
   1500       # If we are currently expecting a matching '>', the pending '<'
   1501       # must have been an operator.  Remove them from expression stack.
   1502       while stack and stack[-1] == '<':
   1503         stack.pop()
   1504       if not stack:
   1505         return (-1, None)
   1506       if ((stack[-1] == '(' and char == ')') or
   1507           (stack[-1] == '[' and char == ']') or
   1508           (stack[-1] == '{' and char == '}')):
   1509         stack.pop()
   1510         if not stack:
   1511           return (i + 1, None)
   1512       else:
   1513         # Mismatched parentheses
   1514         return (-1, None)
   1515     elif char == '>':
   1516       # Found potential end of template argument list.
   1517 
   1518       # Ignore "->" and operator functions
   1519       if (i > 0 and
   1520           (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))):
   1521         continue
   1522 
   1523       # Pop the stack if there is a matching '<'.  Otherwise, ignore
   1524       # this '>' since it must be an operator.
   1525       if stack:
   1526         if stack[-1] == '<':
   1527           stack.pop()
   1528           if not stack:
   1529             return (i + 1, None)
   1530     elif char == ';':
   1531       # Found something that look like end of statements.  If we are currently
   1532       # expecting a '>', the matching '<' must have been an operator, since
   1533       # template argument list should not contain statements.
   1534       while stack and stack[-1] == '<':
   1535         stack.pop()
   1536       if not stack:
   1537         return (-1, None)
   1538 
   1539   # Did not find end of expression or unbalanced parentheses on this line
   1540   return (-1, stack)
   1541 
   1542 
   1543 def CloseExpression(clean_lines, linenum, pos):
   1544   """If input points to ( or { or [ or <, finds the position that closes it.
   1545 
   1546   If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the
   1547   linenum/pos that correspond to the closing of the expression.
   1548 
   1549   TODO(unknown): cpplint spends a fair bit of time matching parentheses.
   1550   Ideally we would want to index all opening and closing parentheses once
   1551   and have CloseExpression be just a simple lookup, but due to preprocessor
   1552   tricks, this is not so easy.
   1553 
   1554   Args:
   1555     clean_lines: A CleansedLines instance containing the file.
   1556     linenum: The number of the line to check.
   1557     pos: A position on the line.
   1558 
   1559   Returns:
   1560     A tuple (line, linenum, pos) pointer *past* the closing brace, or
   1561     (line, len(lines), -1) if we never find a close.  Note we ignore
   1562     strings and comments when matching; and the line we return is the
   1563     'cleansed' line at linenum.
   1564   """
   1565 
   1566   line = clean_lines.elided[linenum]
   1567   if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]):
   1568     return (line, clean_lines.NumLines(), -1)
   1569 
   1570   # Check first line
   1571   (end_pos, stack) = FindEndOfExpressionInLine(line, pos, [])
   1572   if end_pos > -1:
   1573     return (line, linenum, end_pos)
   1574 
   1575   # Continue scanning forward
   1576   while stack and linenum < clean_lines.NumLines() - 1:
   1577     linenum += 1
   1578     line = clean_lines.elided[linenum]
   1579     (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack)
   1580     if end_pos > -1:
   1581       return (line, linenum, end_pos)
   1582 
   1583   # Did not find end of expression before end of file, give up
   1584   return (line, clean_lines.NumLines(), -1)
   1585 
   1586 
   1587 def FindStartOfExpressionInLine(line, endpos, stack):
   1588   """Find position at the matching start of current expression.
   1589 
   1590   This is almost the reverse of FindEndOfExpressionInLine, but note
   1591   that the input position and returned position differs by 1.
   1592 
   1593   Args:
   1594     line: a CleansedLines line.
   1595     endpos: start searching at this position.
   1596     stack: nesting stack at endpos.
   1597 
   1598   Returns:
   1599     On finding matching start: (index at matching start, None)
   1600     On finding an unclosed expression: (-1, None)
   1601     Otherwise: (-1, new stack at beginning of this line)
   1602   """
   1603   i = endpos
   1604   while i >= 0:
   1605     char = line[i]
   1606     if char in ')]}':
   1607       # Found end of expression, push to expression stack
   1608       stack.append(char)
   1609     elif char == '>':
   1610       # Found potential end of template argument list.
   1611       #
   1612       # Ignore it if it's a "->" or ">=" or "operator>"
   1613       if (i > 0 and
   1614           (line[i - 1] == '-' or
   1615            Match(r'\s>=\s', line[i - 1:]) or
   1616            Search(r'\boperator\s*$', line[0:i]))):
   1617         i -= 1
   1618       else:
   1619         stack.append('>')
   1620     elif char == '<':
   1621       # Found potential start of template argument list
   1622       if i > 0 and line[i - 1] == '<':
   1623         # Left shift operator
   1624         i -= 1
   1625       else:
   1626         # If there is a matching '>', we can pop the expression stack.
   1627         # Otherwise, ignore this '<' since it must be an operator.
   1628         if stack and stack[-1] == '>':
   1629           stack.pop()
   1630           if not stack:
   1631             return (i, None)
   1632     elif char in '([{':
   1633       # Found start of expression.
   1634       #
   1635       # If there are any unmatched '>' on the stack, they must be
   1636       # operators.  Remove those.
   1637       while stack and stack[-1] == '>':
   1638         stack.pop()
   1639       if not stack:
   1640         return (-1, None)
   1641       if ((char == '(' and stack[-1] == ')') or
   1642           (char == '[' and stack[-1] == ']') or
   1643           (char == '{' and stack[-1] == '}')):
   1644         stack.pop()
   1645         if not stack:
   1646           return (i, None)
   1647       else:
   1648         # Mismatched parentheses
   1649         return (-1, None)
   1650     elif char == ';':
   1651       # Found something that look like end of statements.  If we are currently
   1652       # expecting a '<', the matching '>' must have been an operator, since
   1653       # template argument list should not contain statements.
   1654       while stack and stack[-1] == '>':
   1655         stack.pop()
   1656       if not stack:
   1657         return (-1, None)
   1658 
   1659     i -= 1
   1660 
   1661   return (-1, stack)
   1662 
   1663 
   1664 def ReverseCloseExpression(clean_lines, linenum, pos):
   1665   """If input points to ) or } or ] or >, finds the position that opens it.
   1666 
   1667   If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the
   1668   linenum/pos that correspond to the opening of the expression.
   1669 
   1670   Args:
   1671     clean_lines: A CleansedLines instance containing the file.
   1672     linenum: The number of the line to check.
   1673     pos: A position on the line.
   1674 
   1675   Returns:
   1676     A tuple (line, linenum, pos) pointer *at* the opening brace, or
   1677     (line, 0, -1) if we never find the matching opening brace.  Note
   1678     we ignore strings and comments when matching; and the line we
   1679     return is the 'cleansed' line at linenum.
   1680   """
   1681   line = clean_lines.elided[linenum]
   1682   if line[pos] not in ')}]>':
   1683     return (line, 0, -1)
   1684 
   1685   # Check last line
   1686   (start_pos, stack) = FindStartOfExpressionInLine(line, pos, [])
   1687   if start_pos > -1:
   1688     return (line, linenum, start_pos)
   1689 
   1690   # Continue scanning backward
   1691   while stack and linenum > 0:
   1692     linenum -= 1
   1693     line = clean_lines.elided[linenum]
   1694     (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack)
   1695     if start_pos > -1:
   1696       return (line, linenum, start_pos)
   1697 
   1698   # Did not find start of expression before beginning of file, give up
   1699   return (line, 0, -1)
   1700 
   1701 
   1702 def CheckForCopyright(filename, lines, error):
   1703   """Logs an error if no Copyright message appears at the top of the file."""
   1704 
   1705   # We'll say it should occur by line 10. Don't forget there's a
   1706   # dummy line at the front.
   1707   for line in xrange(1, min(len(lines), 11)):
   1708     if re.search(r'Copyright', lines[line], re.I): break
   1709   else:                       # means no copyright line was found
   1710     error(filename, 0, 'legal/copyright', 5,
   1711           'No copyright message found.  '
   1712           'You should have a line: "Copyright [year] <Copyright Owner>"')
   1713 
   1714 
   1715 def GetIndentLevel(line):
   1716   """Return the number of leading spaces in line.
   1717 
   1718   Args:
   1719     line: A string to check.
   1720 
   1721   Returns:
   1722     An integer count of leading spaces, possibly zero.
   1723   """
   1724   indent = Match(r'^( *)\S', line)
   1725   if indent:
   1726     return len(indent.group(1))
   1727   else:
   1728     return 0
   1729 
   1730 
   1731 def GetHeaderGuardCPPVariable(filename):
   1732   """Returns the CPP variable that should be used as a header guard.
   1733 
   1734   Args:
   1735     filename: The name of a C++ header file.
   1736 
   1737   Returns:
   1738     The CPP variable that should be used as a header guard in the
   1739     named file.
   1740 
   1741   """
   1742 
   1743   # Restores original filename in case that cpplint is invoked from Emacs's
   1744   # flymake.
   1745   filename = re.sub(r'_flymake\.h$', '.h', filename)
   1746   filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
   1747   # Replace 'c++' with 'cpp'.
   1748   filename = filename.replace('C++', 'cpp').replace('c++', 'cpp')
   1749 
   1750   fileinfo = FileInfo(filename)
   1751   file_path_from_root = fileinfo.RepositoryName()
   1752   if _root:
   1753     suffix = os.sep
   1754     # On Windows using directory separator will leave us with
   1755     # "bogus escape error" unless we properly escape regex.
   1756     if suffix == '\\':
   1757       suffix += '\\'
   1758     file_path_from_root = re.sub('^' + _root + suffix, '', file_path_from_root)
   1759   return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_'
   1760 
   1761 
   1762 def CheckForHeaderGuard(filename, clean_lines, error):
   1763   """Checks that the file contains a header guard.
   1764 
   1765   Logs an error if no #ifndef header guard is present.  For other
   1766   headers, checks that the full pathname is used.
   1767 
   1768   Args:
   1769     filename: The name of the C++ header file.
   1770     clean_lines: A CleansedLines instance containing the file.
   1771     error: The function to call with any errors found.
   1772   """
   1773 
   1774   # Don't check for header guards if there are error suppression
   1775   # comments somewhere in this file.
   1776   #
   1777   # Because this is silencing a warning for a nonexistent line, we
   1778   # only support the very specific NOLINT(build/header_guard) syntax,
   1779   # and not the general NOLINT or NOLINT(*) syntax.
   1780   raw_lines = clean_lines.lines_without_raw_strings
   1781   for i in raw_lines:
   1782     if Search(r'//\s*NOLINT\(build/header_guard\)', i):
   1783       return
   1784 
   1785   cppvar = GetHeaderGuardCPPVariable(filename)
   1786 
   1787   ifndef = ''
   1788   ifndef_linenum = 0
   1789   define = ''
   1790   endif = ''
   1791   endif_linenum = 0
   1792   for linenum, line in enumerate(raw_lines):
   1793     linesplit = line.split()
   1794     if len(linesplit) >= 2:
   1795       # find the first occurrence of #ifndef and #define, save arg
   1796       if not ifndef and linesplit[0] == '#ifndef':
   1797         # set ifndef to the header guard presented on the #ifndef line.
   1798         ifndef = linesplit[1]
   1799         ifndef_linenum = linenum
   1800       if not define and linesplit[0] == '#define':
   1801         define = linesplit[1]
   1802     # find the last occurrence of #endif, save entire line
   1803     if line.startswith('#endif'):
   1804       endif = line
   1805       endif_linenum = linenum
   1806 
   1807   if not ifndef or not define or ifndef != define:
   1808     error(filename, 0, 'build/header_guard', 5,
   1809           'No #ifndef header guard found, suggested CPP variable is: %s' %
   1810           cppvar)
   1811     return
   1812 
   1813   # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
   1814   # for backward compatibility.
   1815   if ifndef != cppvar:
   1816     error_level = 0
   1817     if ifndef != cppvar + '_':
   1818       error_level = 5
   1819 
   1820     ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum,
   1821                             error)
   1822     error(filename, ifndef_linenum, 'build/header_guard', error_level,
   1823           '#ifndef header guard has wrong style, please use: %s' % cppvar)
   1824 
   1825   # Check for "//" comments on endif line.
   1826   ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum,
   1827                           error)
   1828   match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif)
   1829   if match:
   1830     if match.group(1) == '_':
   1831       # Issue low severity warning for deprecated double trailing underscore
   1832       error(filename, endif_linenum, 'build/header_guard', 0,
   1833             '#endif line should be "#endif  // %s"' % cppvar)
   1834     return
   1835 
   1836   # Didn't find the corresponding "//" comment.  If this file does not
   1837   # contain any "//" comments at all, it could be that the compiler
   1838   # only wants "/**/" comments, look for those instead.
   1839   no_single_line_comments = True
   1840   for i in xrange(1, len(raw_lines) - 1):
   1841     line = raw_lines[i]
   1842     if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line):
   1843       no_single_line_comments = False
   1844       break
   1845 
   1846   if no_single_line_comments:
   1847     match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif)
   1848     if match:
   1849       if match.group(1) == '_':
   1850         # Low severity warning for double trailing underscore
   1851         error(filename, endif_linenum, 'build/header_guard', 0,
   1852               '#endif line should be "#endif  /* %s */"' % cppvar)
   1853       return
   1854 
   1855   # Didn't find anything
   1856   error(filename, endif_linenum, 'build/header_guard', 5,
   1857         '#endif line should be "#endif  // %s"' % cppvar)
   1858 
   1859 
   1860 def CheckHeaderFileIncluded(filename, include_state, error):
   1861   """Logs an error if a .cc file does not include its header."""
   1862 
   1863   # Do not check test files
   1864   fileinfo = FileInfo(filename)
   1865   if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()):
   1866     return
   1867 
   1868   headerfile = filename[0:len(filename) - len(fileinfo.Extension())] + '.h'
   1869   if not os.path.exists(headerfile):
   1870     return
   1871   headername = FileInfo(headerfile).RepositoryName()
   1872   first_include = 0
   1873   for section_list in include_state.include_list:
   1874     for f in section_list:
   1875       if headername in f[0] or f[0] in headername:
   1876         return
   1877       if not first_include:
   1878         first_include = f[1]
   1879 
   1880   error(filename, first_include, 'build/include', 5,
   1881         '%s should include its header file %s' % (fileinfo.RepositoryName(),
   1882                                                   headername))
   1883 
   1884 
   1885 def CheckForBadCharacters(filename, lines, error):
   1886   """Logs an error for each line containing bad characters.
   1887 
   1888   Two kinds of bad characters:
   1889 
   1890   1. Unicode replacement characters: These indicate that either the file
   1891   contained invalid UTF-8 (likely) or Unicode replacement characters (which
   1892   it shouldn't).  Note that it's possible for this to throw off line
   1893   numbering if the invalid UTF-8 occurred adjacent to a newline.
   1894 
   1895   2. NUL bytes.  These are problematic for some tools.
   1896 
   1897   Args:
   1898     filename: The name of the current file.
   1899     lines: An array of strings, each representing a line of the file.
   1900     error: The function to call with any errors found.
   1901   """
   1902   for linenum, line in enumerate(lines):
   1903     if u'\ufffd' in line:
   1904       error(filename, linenum, 'readability/utf8', 5,
   1905             'Line contains invalid UTF-8 (or Unicode replacement character).')
   1906     if '\0' in line:
   1907       error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.')
   1908 
   1909 
   1910 def CheckForNewlineAtEOF(filename, lines, error):
   1911   """Logs an error if there is no newline char at the end of the file.
   1912 
   1913   Args:
   1914     filename: The name of the current file.
   1915     lines: An array of strings, each representing a line of the file.
   1916     error: The function to call with any errors found.
   1917   """
   1918 
   1919   # The array lines() was created by adding two newlines to the
   1920   # original file (go figure), then splitting on \n.
   1921   # To verify that the file ends in \n, we just have to make sure the
   1922   # last-but-two element of lines() exists and is empty.
   1923   if len(lines) < 3 or lines[-2]:
   1924     error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
   1925           'Could not find a newline character at the end of the file.')
   1926 
   1927 
   1928 def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
   1929   """Logs an error if we see /* ... */ or "..." that extend past one line.
   1930 
   1931   /* ... */ comments are legit inside macros, for one line.
   1932   Otherwise, we prefer // comments, so it's ok to warn about the
   1933   other.  Likewise, it's ok for strings to extend across multiple
   1934   lines, as long as a line continuation character (backslash)
   1935   terminates each line. Although not currently prohibited by the C++
   1936   style guide, it's ugly and unnecessary. We don't do well with either
   1937   in this lint program, so we warn about both.
   1938 
   1939   Args:
   1940     filename: The name of the current file.
   1941     clean_lines: A CleansedLines instance containing the file.
   1942     linenum: The number of the line to check.
   1943     error: The function to call with any errors found.
   1944   """
   1945   line = clean_lines.elided[linenum]
   1946 
   1947   # Remove all \\ (escaped backslashes) from the line. They are OK, and the
   1948   # second (escaped) slash may trigger later \" detection erroneously.
   1949   line = line.replace('\\\\', '')
   1950 
   1951   if line.count('/*') > line.count('*/'):
   1952     error(filename, linenum, 'readability/multiline_comment', 5,
   1953           'Complex multi-line /*...*/-style comment found. '
   1954           'Lint may give bogus warnings.  '
   1955           'Consider replacing these with //-style comments, '
   1956           'with #if 0...#endif, '
   1957           'or with more clearly structured multi-line comments.')
   1958 
   1959   if (line.count('"') - line.count('\\"')) % 2:
   1960     error(filename, linenum, 'readability/multiline_string', 5,
   1961           'Multi-line string ("...") found.  This lint script doesn\'t '
   1962           'do well with such strings, and may give bogus warnings.  '
   1963           'Use C++11 raw strings or concatenation instead.')
   1964 
   1965 
   1966 # (non-threadsafe name, thread-safe alternative, validation pattern)
   1967 #
   1968 # The validation pattern is used to eliminate false positives such as:
   1969 #  _rand();               // false positive due to substring match.
   1970 #  ->rand();              // some member function rand().
   1971 #  ACMRandom rand(seed);  // some variable named rand.
   1972 #  ISAACRandom rand();    // another variable named rand.
   1973 #
   1974 # Basically we require the return value of these functions to be used
   1975 # in some expression context on the same line by matching on some
   1976 # operator before the function name.  This eliminates constructors and
   1977 # member function calls.
   1978 _UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)'
   1979 _THREADING_LIST = (
   1980     ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'),
   1981     ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'),
   1982     ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'),
   1983     ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'),
   1984     ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'),
   1985     ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'),
   1986     ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'),
   1987     ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'),
   1988     ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'),
   1989     ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'),
   1990     ('strtok(', 'strtok_r(',
   1991      _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'),
   1992     ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'),
   1993     )
   1994 
   1995 
   1996 def CheckPosixThreading(filename, clean_lines, linenum, error):
   1997   """Checks for calls to thread-unsafe functions.
   1998 
   1999   Much code has been originally written without consideration of
   2000   multi-threading. Also, engineers are relying on their old experience;
   2001   they have learned posix before threading extensions were added. These
   2002   tests guide the engineers to use thread-safe functions (when using
   2003   posix directly).
   2004 
   2005   Args:
   2006     filename: The name of the current file.
   2007     clean_lines: A CleansedLines instance containing the file.
   2008     linenum: The number of the line to check.
   2009     error: The function to call with any errors found.
   2010   """
   2011   line = clean_lines.elided[linenum]
   2012   for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST:
   2013     # Additional pattern matching check to confirm that this is the
   2014     # function we are looking for
   2015     if Search(pattern, line):
   2016       error(filename, linenum, 'runtime/threadsafe_fn', 2,
   2017             'Consider using ' + multithread_safe_func +
   2018             '...) instead of ' + single_thread_func +
   2019             '...) for improved thread safety.')
   2020 
   2021 
   2022 def CheckVlogArguments(filename, clean_lines, linenum, error):
   2023   """Checks that VLOG() is only used for defining a logging level.
   2024 
   2025   For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and
   2026   VLOG(FATAL) are not.
   2027 
   2028   Args:
   2029     filename: The name of the current file.
   2030     clean_lines: A CleansedLines instance containing the file.
   2031     linenum: The number of the line to check.
   2032     error: The function to call with any errors found.
   2033   """
   2034   line = clean_lines.elided[linenum]
   2035   if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line):
   2036     error(filename, linenum, 'runtime/vlog', 5,
   2037           'VLOG() should be used with numeric verbosity level.  '
   2038           'Use LOG() if you want symbolic severity levels.')
   2039 
   2040 # Matches invalid increment: *count++, which moves pointer instead of
   2041 # incrementing a value.
   2042 _RE_PATTERN_INVALID_INCREMENT = re.compile(
   2043     r'^\s*\*\w+(\+\+|--);')
   2044 
   2045 
   2046 def CheckInvalidIncrement(filename, clean_lines, linenum, error):
   2047   """Checks for invalid increment *count++.
   2048 
   2049   For example following function:
   2050   void increment_counter(int* count) {
   2051     *count++;
   2052   }
   2053   is invalid, because it effectively does count++, moving pointer, and should
   2054   be replaced with ++*count, (*count)++ or *count += 1.
   2055 
   2056   Args:
   2057     filename: The name of the current file.
   2058     clean_lines: A CleansedLines instance containing the file.
   2059     linenum: The number of the line to check.
   2060     error: The function to call with any errors found.
   2061   """
   2062   line = clean_lines.elided[linenum]
   2063   if _RE_PATTERN_INVALID_INCREMENT.match(line):
   2064     error(filename, linenum, 'runtime/invalid_increment', 5,
   2065           'Changing pointer instead of value (or unused value of operator*).')
   2066 
   2067 
   2068 def IsMacroDefinition(clean_lines, linenum):
   2069   if Search(r'^#define', clean_lines[linenum]):
   2070     return True
   2071 
   2072   if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]):
   2073     return True
   2074 
   2075   return False
   2076 
   2077 
   2078 def IsForwardClassDeclaration(clean_lines, linenum):
   2079   return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum])
   2080 
   2081 
   2082 class _BlockInfo(object):
   2083   """Stores information about a generic block of code."""
   2084 
   2085   def __init__(self, linenum, seen_open_brace):
   2086     self.starting_linenum = linenum
   2087     self.seen_open_brace = seen_open_brace
   2088     self.open_parentheses = 0
   2089     self.inline_asm = _NO_ASM
   2090     self.check_namespace_indentation = False
   2091 
   2092   def CheckBegin(self, filename, clean_lines, linenum, error):
   2093     """Run checks that applies to text up to the opening brace.
   2094 
   2095     This is mostly for checking the text after the class identifier
   2096     and the "{", usually where the base class is specified.  For other
   2097     blocks, there isn't much to check, so we always pass.
   2098 
   2099     Args:
   2100       filename: The name of the current file.
   2101       clean_lines: A CleansedLines instance containing the file.
   2102       linenum: The number of the line to check.
   2103       error: The function to call with any errors found.
   2104     """
   2105     pass
   2106 
   2107   def CheckEnd(self, filename, clean_lines, linenum, error):
   2108     """Run checks that applies to text after the closing brace.
   2109 
   2110     This is mostly used for checking end of namespace comments.
   2111 
   2112     Args:
   2113       filename: The name of the current file.
   2114       clean_lines: A CleansedLines instance containing the file.
   2115       linenum: The number of the line to check.
   2116       error: The function to call with any errors found.
   2117     """
   2118     pass
   2119 
   2120   def IsBlockInfo(self):
   2121     """Returns true if this block is a _BlockInfo.
   2122 
   2123     This is convenient for verifying that an object is an instance of
   2124     a _BlockInfo, but not an instance of any of the derived classes.
   2125 
   2126     Returns:
   2127       True for this class, False for derived classes.
   2128     """
   2129     return self.__class__ == _BlockInfo
   2130 
   2131 
   2132 class _ExternCInfo(_BlockInfo):
   2133   """Stores information about an 'extern "C"' block."""
   2134 
   2135   def __init__(self, linenum):
   2136     _BlockInfo.__init__(self, linenum, True)
   2137 
   2138 
   2139 class _ClassInfo(_BlockInfo):
   2140   """Stores information about a class."""
   2141 
   2142   def __init__(self, name, class_or_struct, clean_lines, linenum):
   2143     _BlockInfo.__init__(self, linenum, False)
   2144     self.name = name
   2145     self.is_derived = False
   2146     self.check_namespace_indentation = True
   2147     if class_or_struct == 'struct':
   2148       self.access = 'public'
   2149       self.is_struct = True
   2150     else:
   2151       self.access = 'private'
   2152       self.is_struct = False
   2153 
   2154     # Remember initial indentation level for this class.  Using raw_lines here
   2155     # instead of elided to account for leading comments.
   2156     self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum])
   2157 
   2158     # Try to find the end of the class.  This will be confused by things like:
   2159     #   class A {
   2160     #   } *x = { ...
   2161     #
   2162     # But it's still good enough for CheckSectionSpacing.
   2163     self.last_line = 0
   2164     depth = 0
   2165     for i in range(linenum, clean_lines.NumLines()):
   2166       line = clean_lines.elided[i]
   2167       depth += line.count('{') - line.count('}')
   2168       if not depth:
   2169         self.last_line = i
   2170         break
   2171 
   2172   def CheckBegin(self, filename, clean_lines, linenum, error):
   2173     # Look for a bare ':'
   2174     if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
   2175       self.is_derived = True
   2176 
   2177   def CheckEnd(self, filename, clean_lines, linenum, error):
   2178     # If there is a DISALLOW macro, it should appear near the end of
   2179     # the class.
   2180     seen_last_thing_in_class = False
   2181     for i in xrange(linenum - 1, self.starting_linenum, -1):
   2182       match = Search(
   2183           r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' +
   2184           self.name + r'\)',
   2185           clean_lines.elided[i])
   2186       if match:
   2187         if seen_last_thing_in_class:
   2188           error(filename, i, 'readability/constructors', 3,
   2189                 match.group(1) + ' should be the last thing in the class')
   2190         break
   2191 
   2192       if not Match(r'^\s*$', clean_lines.elided[i]):
   2193         seen_last_thing_in_class = True
   2194 
   2195     # Check that closing brace is aligned with beginning of the class.
   2196     # Only do this if the closing brace is indented by only whitespaces.
   2197     # This means we will not check single-line class definitions.
   2198     indent = Match(r'^( *)\}', clean_lines.elided[linenum])
   2199     if indent and len(indent.group(1)) != self.class_indent:
   2200       if self.is_struct:
   2201         parent = 'struct ' + self.name
   2202       else:
   2203         parent = 'class ' + self.name
   2204       error(filename, linenum, 'whitespace/indent', 3,
   2205             'Closing brace should be aligned with beginning of %s' % parent)
   2206 
   2207 
   2208 class _NamespaceInfo(_BlockInfo):
   2209   """Stores information about a namespace."""
   2210 
   2211   def __init__(self, name, linenum):
   2212     _BlockInfo.__init__(self, linenum, False)
   2213     self.name = name or ''
   2214     self.check_namespace_indentation = True
   2215 
   2216   def CheckEnd(self, filename, clean_lines, linenum, error):
   2217     """Check end of namespace comments."""
   2218     line = clean_lines.raw_lines[linenum]
   2219 
   2220     # Check how many lines is enclosed in this namespace.  Don't issue
   2221     # warning for missing namespace comments if there aren't enough
   2222     # lines.  However, do apply checks if there is already an end of
   2223     # namespace comment and it's incorrect.
   2224     #
   2225     # TODO(unknown): We always want to check end of namespace comments
   2226     # if a namespace is large, but sometimes we also want to apply the
   2227     # check if a short namespace contained nontrivial things (something
   2228     # other than forward declarations).  There is currently no logic on
   2229     # deciding what these nontrivial things are, so this check is
   2230     # triggered by namespace size only, which works most of the time.
   2231     if (linenum - self.starting_linenum < 10
   2232         and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)):
   2233       return
   2234 
   2235     # Look for matching comment at end of namespace.
   2236     #
   2237     # Note that we accept C style "/* */" comments for terminating
   2238     # namespaces, so that code that terminate namespaces inside
   2239     # preprocessor macros can be cpplint clean.
   2240     #
   2241     # We also accept stuff like "// end of namespace <name>." with the
   2242     # period at the end.
   2243     #
   2244     # Besides these, we don't accept anything else, otherwise we might
   2245     # get false negatives when existing comment is a substring of the
   2246     # expected namespace.
   2247     if self.name:
   2248       # Named namespace
   2249       if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' +
   2250                     re.escape(self.name) + r'[\*/\.\\\s]*$'),
   2251                    line):
   2252         error(filename, linenum, 'readability/namespace', 5,
   2253               'Namespace should be terminated with "// namespace %s"' %
   2254               self.name)
   2255     else:
   2256       # Anonymous namespace
   2257       if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
   2258         # If "// namespace anonymous" or "// anonymous namespace (more text)",
   2259         # mention "// anonymous namespace" as an acceptable form
   2260         if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line):
   2261           error(filename, linenum, 'readability/namespace', 5,
   2262                 'Anonymous namespace should be terminated with "// namespace"'
   2263                 ' or "// anonymous namespace"')
   2264         else:
   2265           error(filename, linenum, 'readability/namespace', 5,
   2266                 'Anonymous namespace should be terminated with "// namespace"')
   2267 
   2268 
   2269 class _PreprocessorInfo(object):
   2270   """Stores checkpoints of nesting stacks when #if/#else is seen."""
   2271 
   2272   def __init__(self, stack_before_if):
   2273     # The entire nesting stack before #if
   2274     self.stack_before_if = stack_before_if
   2275 
   2276     # The entire nesting stack up to #else
   2277     self.stack_before_else = []
   2278 
   2279     # Whether we have already seen #else or #elif
   2280     self.seen_else = False
   2281 
   2282 
   2283 class NestingState(object):
   2284   """Holds states related to parsing braces."""
   2285 
   2286   def __init__(self):
   2287     # Stack for tracking all braces.  An object is pushed whenever we
   2288     # see a "{", and popped when we see a "}".  Only 3 types of
   2289     # objects are possible:
   2290     # - _ClassInfo: a class or struct.
   2291     # - _NamespaceInfo: a namespace.
   2292     # - _BlockInfo: some other type of block.
   2293     self.stack = []
   2294 
   2295     # Top of the previous stack before each Update().
   2296     #
   2297     # Because the nesting_stack is updated at the end of each line, we
   2298     # had to do some convoluted checks to find out what is the current
   2299     # scope at the beginning of the line.  This check is simplified by
   2300     # saving the previous top of nesting stack.
   2301     #
   2302     # We could save the full stack, but we only need the top.  Copying
   2303     # the full nesting stack would slow down cpplint by ~10%.
   2304     self.previous_stack_top = []
   2305 
   2306     # Stack of _PreprocessorInfo objects.
   2307     self.pp_stack = []
   2308 
   2309   def SeenOpenBrace(self):
   2310     """Check if we have seen the opening brace for the innermost block.
   2311 
   2312     Returns:
   2313       True if we have seen the opening brace, False if the innermost
   2314       block is still expecting an opening brace.
   2315     """
   2316     return (not self.stack) or self.stack[-1].seen_open_brace
   2317 
   2318   def InNamespaceBody(self):
   2319     """Check if we are currently one level inside a namespace body.
   2320 
   2321     Returns:
   2322       True if top of the stack is a namespace block, False otherwise.
   2323     """
   2324     return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
   2325 
   2326   def InExternC(self):
   2327     """Check if we are currently one level inside an 'extern "C"' block.
   2328 
   2329     Returns:
   2330       True if top of the stack is an extern block, False otherwise.
   2331     """
   2332     return self.stack and isinstance(self.stack[-1], _ExternCInfo)
   2333 
   2334   def InClassDeclaration(self):
   2335     """Check if we are currently one level inside a class or struct declaration.
   2336 
   2337     Returns:
   2338       True if top of the stack is a class/struct, False otherwise.
   2339     """
   2340     return self.stack and isinstance(self.stack[-1], _ClassInfo)
   2341 
   2342   def InAsmBlock(self):
   2343     """Check if we are currently one level inside an inline ASM block.
   2344 
   2345     Returns:
   2346       True if the top of the stack is a block containing inline ASM.
   2347     """
   2348     return self.stack and self.stack[-1].inline_asm != _NO_ASM
   2349 
   2350   def InTemplateArgumentList(self, clean_lines, linenum, pos):
   2351     """Check if current position is inside template argument list.
   2352 
   2353     Args:
   2354       clean_lines: A CleansedLines instance containing the file.
   2355       linenum: The number of the line to check.
   2356       pos: position just after the suspected template argument.
   2357     Returns:
   2358       True if (linenum, pos) is inside template arguments.
   2359     """
   2360     while linenum < clean_lines.NumLines():
   2361       # Find the earliest character that might indicate a template argument
   2362       line = clean_lines.elided[linenum]
   2363       match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:])
   2364       if not match:
   2365         linenum += 1
   2366         pos = 0
   2367         continue
   2368       token = match.group(1)
   2369       pos += len(match.group(0))
   2370 
   2371       # These things do not look like template argument list:
   2372       #   class Suspect {
   2373       #   class Suspect x; }
   2374       if token in ('{', '}', ';'): return False
   2375 
   2376       # These things look like template argument list:
   2377       #   template <class Suspect>
   2378       #   template <class Suspect = default_value>
   2379       #   template <class Suspect[]>
   2380       #   template <class Suspect...>
   2381       if token in ('>', '=', '[', ']', '.'): return True
   2382 
   2383       # Check if token is an unmatched '<'.
   2384       # If not, move on to the next character.
   2385       if token != '<':
   2386         pos += 1
   2387         if pos >= len(line):
   2388           linenum += 1
   2389           pos = 0
   2390         continue
   2391 
   2392       # We can't be sure if we just find a single '<', and need to
   2393       # find the matching '>'.
   2394       (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1)
   2395       if end_pos < 0:
   2396         # Not sure if template argument list or syntax error in file
   2397         return False
   2398       linenum = end_line
   2399       pos = end_pos
   2400     return False
   2401 
   2402   def UpdatePreprocessor(self, line):
   2403     """Update preprocessor stack.
   2404 
   2405     We need to handle preprocessors due to classes like this:
   2406       #ifdef SWIG
   2407       struct ResultDetailsPageElementExtensionPoint {
   2408       #else
   2409       struct ResultDetailsPageElementExtensionPoint : public Extension {
   2410       #endif
   2411 
   2412     We make the following assumptions (good enough for most files):
   2413     - Preprocessor condition evaluates to true from #if up to first
   2414       #else/#elif/#endif.
   2415 
   2416     - Preprocessor condition evaluates to false from #else/#elif up
   2417       to #endif.  We still perform lint checks on these lines, but
   2418       these do not affect nesting stack.
   2419 
   2420     Args:
   2421       line: current line to check.
   2422     """
   2423     if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
   2424       # Beginning of #if block, save the nesting stack here.  The saved
   2425       # stack will allow us to restore the parsing state in the #else case.
   2426       self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
   2427     elif Match(r'^\s*#\s*(else|elif)\b', line):
   2428       # Beginning of #else block
   2429       if self.pp_stack:
   2430         if not self.pp_stack[-1].seen_else:
   2431           # This is the first #else or #elif block.  Remember the
   2432           # whole nesting stack up to this point.  This is what we
   2433           # keep after the #endif.
   2434           self.pp_stack[-1].seen_else = True
   2435           self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
   2436 
   2437         # Restore the stack to how it was before the #if
   2438         self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
   2439       else:
   2440         # TODO(unknown): unexpected #else, issue warning?
   2441         pass
   2442     elif Match(r'^\s*#\s*endif\b', line):
   2443       # End of #if or #else blocks.
   2444       if self.pp_stack:
   2445         # If we saw an #else, we will need to restore the nesting
   2446         # stack to its former state before the #else, otherwise we
   2447         # will just continue from where we left off.
   2448         if self.pp_stack[-1].seen_else:
   2449           # Here we can just use a shallow copy since we are the last
   2450           # reference to it.
   2451           self.stack = self.pp_stack[-1].stack_before_else
   2452         # Drop the corresponding #if
   2453         self.pp_stack.pop()
   2454       else:
   2455         # TODO(unknown): unexpected #endif, issue warning?
   2456         pass
   2457 
   2458   # TODO(unknown): Update() is too long, but we will refactor later.
   2459   def Update(self, filename, clean_lines, linenum, error):
   2460     """Update nesting state with current line.
   2461 
   2462     Args:
   2463       filename: The name of the current file.
   2464       clean_lines: A CleansedLines instance containing the file.
   2465       linenum: The number of the line to check.
   2466       error: The function to call with any errors found.
   2467     """
   2468     line = clean_lines.elided[linenum]
   2469 
   2470     # Remember top of the previous nesting stack.
   2471     #
   2472     # The stack is always pushed/popped and not modified in place, so
   2473     # we can just do a shallow copy instead of copy.deepcopy.  Using
   2474     # deepcopy would slow down cpplint by ~28%.
   2475     if self.stack:
   2476       self.previous_stack_top = self.stack[-1]
   2477     else:
   2478       self.previous_stack_top = None
   2479 
   2480     # Update pp_stack
   2481     self.UpdatePreprocessor(line)
   2482 
   2483     # Count parentheses.  This is to avoid adding struct arguments to
   2484     # the nesting stack.
   2485     if self.stack:
   2486       inner_block = self.stack[-1]
   2487       depth_change = line.count('(') - line.count(')')
   2488       inner_block.open_parentheses += depth_change
   2489 
   2490       # Also check if we are starting or ending an inline assembly block.
   2491       if inner_block.inline_asm in (_NO_ASM, _END_ASM):
   2492         if (depth_change != 0 and
   2493             inner_block.open_parentheses == 1 and
   2494             _MATCH_ASM.match(line)):
   2495           # Enter assembly block
   2496           inner_block.inline_asm = _INSIDE_ASM
   2497         else:
   2498           # Not entering assembly block.  If previous line was _END_ASM,
   2499           # we will now shift to _NO_ASM state.
   2500           inner_block.inline_asm = _NO_ASM
   2501       elif (inner_block.inline_asm == _INSIDE_ASM and
   2502             inner_block.open_parentheses == 0):
   2503         # Exit assembly block
   2504         inner_block.inline_asm = _END_ASM
   2505 
   2506     # Consume namespace declaration at the beginning of the line.  Do
   2507     # this in a loop so that we catch same line declarations like this:
   2508     #   namespace proto2 { namespace bridge { class MessageSet; } }
   2509     while True:
   2510       # Match start of namespace.  The "\b\s*" below catches namespace
   2511       # declarations even if it weren't followed by a whitespace, this
   2512       # is so that we don't confuse our namespace checker.  The
   2513       # missing spaces will be flagged by CheckSpacing.
   2514       namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
   2515       if not namespace_decl_match:
   2516         break
   2517 
   2518       new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
   2519       self.stack.append(new_namespace)
   2520 
   2521       line = namespace_decl_match.group(2)
   2522       if line.find('{') != -1:
   2523         new_namespace.seen_open_brace = True
   2524         line = line[line.find('{') + 1:]
   2525 
   2526     # Look for a class declaration in whatever is left of the line
   2527     # after parsing namespaces.  The regexp accounts for decorated classes
   2528     # such as in:
   2529     #   class LOCKABLE API Object {
   2530     #   };
   2531     class_decl_match = Match(
   2532         r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?'
   2533         r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))'
   2534         r'(.*)$', line)
   2535     if (class_decl_match and
   2536         (not self.stack or self.stack[-1].open_parentheses == 0)):
   2537       # We do not want to accept classes that are actually template arguments:
   2538       #   template <class Ignore1,
   2539       #             class Ignore2 = Default<Args>,
   2540       #             template <Args> class Ignore3>
   2541       #   void Function() {};
   2542       #
   2543       # To avoid template argument cases, we scan forward and look for
   2544       # an unmatched '>'.  If we see one, assume we are inside a
   2545       # template argument list.
   2546       end_declaration = len(class_decl_match.group(1))
   2547       if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration):
   2548         self.stack.append(_ClassInfo(
   2549             class_decl_match.group(3), class_decl_match.group(2),
   2550             clean_lines, linenum))
   2551         line = class_decl_match.group(4)
   2552 
   2553     # If we have not yet seen the opening brace for the innermost block,
   2554     # run checks here.
   2555     if not self.SeenOpenBrace():
   2556       self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
   2557 
   2558     # Update access control if we are inside a class/struct
   2559     if self.stack and isinstance(self.stack[-1], _ClassInfo):
   2560       classinfo = self.stack[-1]
   2561       access_match = Match(
   2562           r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?'
   2563           r':(?:[^:]|$)',
   2564           line)
   2565       if access_match:
   2566         classinfo.access = access_match.group(2)
   2567 
   2568         # Check that access keywords are indented +1 space.  Skip this
   2569         # check if the keywords are not preceded by whitespaces.
   2570         indent = access_match.group(1)
   2571         if (len(indent) != classinfo.class_indent + 1 and
   2572             Match(r'^\s*$', indent)):
   2573           if classinfo.is_struct:
   2574             parent = 'struct ' + classinfo.name
   2575           else:
   2576             parent = 'class ' + classinfo.name
   2577           slots = ''
   2578           if access_match.group(3):
   2579             slots = access_match.group(3)
   2580           error(filename, linenum, 'whitespace/indent', 3,
   2581                 '%s%s: should be indented +1 space inside %s' % (
   2582                     access_match.group(2), slots, parent))
   2583 
   2584     # Consume braces or semicolons from what's left of the line
   2585     while True:
   2586       # Match first brace, semicolon, or closed parenthesis.
   2587       matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
   2588       if not matched:
   2589         break
   2590 
   2591       token = matched.group(1)
   2592       if token == '{':
   2593         # If namespace or class hasn't seen a opening brace yet, mark
   2594         # namespace/class head as complete.  Push a new block onto the
   2595         # stack otherwise.
   2596         if not self.SeenOpenBrace():
   2597           self.stack[-1].seen_open_brace = True
   2598         elif Match(r'^extern\s*"[^"]*"\s*\{', line):
   2599           self.stack.append(_ExternCInfo(linenum))
   2600         else:
   2601           self.stack.append(_BlockInfo(linenum, True))
   2602           if _MATCH_ASM.match(line):
   2603             self.stack[-1].inline_asm = _BLOCK_ASM
   2604 
   2605       elif token == ';' or token == ')':
   2606         # If we haven't seen an opening brace yet, but we already saw
   2607         # a semicolon, this is probably a forward declaration.  Pop
   2608         # the stack for these.
   2609         #
   2610         # Similarly, if we haven't seen an opening brace yet, but we
   2611         # already saw a closing parenthesis, then these are probably
   2612         # function arguments with extra "class" or "struct" keywords.
   2613         # Also pop these stack for these.
   2614         if not self.SeenOpenBrace():
   2615           self.stack.pop()
   2616       else:  # token == '}'
   2617         # Perform end of block checks and pop the stack.
   2618         if self.stack:
   2619           self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
   2620           self.stack.pop()
   2621       line = matched.group(2)
   2622 
   2623   def InnermostClass(self):
   2624     """Get class info on the top of the stack.
   2625 
   2626     Returns:
   2627       A _ClassInfo object if we are inside a class, or None otherwise.
   2628     """
   2629     for i in range(len(self.stack), 0, -1):
   2630       classinfo = self.stack[i - 1]
   2631       if isinstance(classinfo, _ClassInfo):
   2632         return classinfo
   2633     return None
   2634 
   2635   def CheckCompletedBlocks(self, filename, error):
   2636     """Checks that all classes and namespaces have been completely parsed.
   2637 
   2638     Call this when all lines in a file have been processed.
   2639     Args:
   2640       filename: The name of the current file.
   2641       error: The function to call with any errors found.
   2642     """
   2643     # Note: This test can result in false positives if #ifdef constructs
   2644     # get in the way of brace matching. See the testBuildClass test in
   2645     # cpplint_unittest.py for an example of this.
   2646     for obj in self.stack:
   2647       if isinstance(obj, _ClassInfo):
   2648         error(filename, obj.starting_linenum, 'build/class', 5,
   2649               'Failed to find complete declaration of class %s' %
   2650               obj.name)
   2651       elif isinstance(obj, _NamespaceInfo):
   2652         error(filename, obj.starting_linenum, 'build/namespaces', 5,
   2653               'Failed to find complete declaration of namespace %s' %
   2654               obj.name)
   2655 
   2656 
   2657 def CheckForNonStandardConstructs(filename, clean_lines, linenum,
   2658                                   nesting_state, error):
   2659   r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
   2660 
   2661   Complain about several constructs which gcc-2 accepts, but which are
   2662   not standard C++.  Warning about these in lint is one way to ease the
   2663   transition to new compilers.
   2664   - put storage class first (e.g. "static const" instead of "const static").
   2665   - "%lld" instead of %qd" in printf-type functions.
   2666   - "%1$d" is non-standard in printf-type functions.
   2667   - "\%" is an undefined character escape sequence.
   2668   - text after #endif is not allowed.
   2669   - invalid inner-style forward declaration.
   2670   - >? and <? operators, and their >?= and <?= cousins.
   2671 
   2672   Additionally, check for constructor/destructor style violations and reference
   2673   members, as it is very convenient to do so while checking for
   2674   gcc-2 compliance.
   2675 
   2676   Args:
   2677     filename: The name of the current file.
   2678     clean_lines: A CleansedLines instance containing the file.
   2679     linenum: The number of the line to check.
   2680     nesting_state: A NestingState instance which maintains information about
   2681                    the current stack of nested blocks being parsed.
   2682     error: A callable to which errors are reported, which takes 4 arguments:
   2683            filename, line number, error level, and message
   2684   """
   2685 
   2686   # Remove comments from the line, but leave in strings for now.
   2687   line = clean_lines.lines[linenum]
   2688 
   2689   if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
   2690     error(filename, linenum, 'runtime/printf_format', 3,
   2691           '%q in format strings is deprecated.  Use %ll instead.')
   2692 
   2693   if Search(r'printf\s*\(.*".*%\d+\$', line):
   2694     error(filename, linenum, 'runtime/printf_format', 2,
   2695           '%N$ formats are unconventional.  Try rewriting to avoid them.')
   2696 
   2697   # Remove escaped backslashes before looking for undefined escapes.
   2698   line = line.replace('\\\\', '')
   2699 
   2700   if Search(r'("|\').*\\(%|\[|\(|{)', line):
   2701     error(filename, linenum, 'build/printf_format', 3,
   2702           '%, [, (, and { are undefined character escapes.  Unescape them.')
   2703 
   2704   # For the rest, work with both comments and strings removed.
   2705   line = clean_lines.elided[linenum]
   2706 
   2707   if Search(r'\b(const|volatile|void|char|short|int|long'
   2708             r'|float|double|signed|unsigned'
   2709             r'|schar|u?int8|u?int16|u?int32|u?int64)'
   2710             r'\s+(register|static|extern|typedef)\b',
   2711             line):
   2712     error(filename, linenum, 'build/storage_class', 5,
   2713           'Storage-class specifier (static, extern, typedef, etc) should be '
   2714           'at the beginning of the declaration.')
   2715 
   2716   if Match(r'\s*#\s*endif\s*[^/\s]+', line):
   2717     error(filename, linenum, 'build/endif_comment', 5,
   2718           'Uncommented text after #endif is non-standard.  Use a comment.')
   2719 
   2720   if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
   2721     error(filename, linenum, 'build/forward_decl', 5,
   2722           'Inner-style forward declarations are invalid.  Remove this line.')
   2723 
   2724   if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
   2725             line):
   2726     error(filename, linenum, 'build/deprecated', 3,
   2727           '>? and <? (max and min) operators are non-standard and deprecated.')
   2728 
   2729   if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
   2730     # TODO(unknown): Could it be expanded safely to arbitrary references,
   2731     # without triggering too many false positives? The first
   2732     # attempt triggered 5 warnings for mostly benign code in the regtest, hence
   2733     # the restriction.
   2734     # Here's the original regexp, for the reference:
   2735     # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
   2736     # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
   2737     error(filename, linenum, 'runtime/member_string_references', 2,
   2738           'const string& members are dangerous. It is much better to use '
   2739           'alternatives, such as pointers or simple constants.')
   2740 
   2741   # Everything else in this function operates on class declarations.
   2742   # Return early if the top of the nesting stack is not a class, or if
   2743   # the class head is not completed yet.
   2744   classinfo = nesting_state.InnermostClass()
   2745   if not classinfo or not classinfo.seen_open_brace:
   2746     return
   2747 
   2748   # The class may have been declared with namespace or classname qualifiers.
   2749   # The constructor and destructor will not have those qualifiers.
   2750   base_classname = classinfo.name.split('::')[-1]
   2751 
   2752   # Look for single-argument constructors that aren't marked explicit.
   2753   # Technically a valid construct, but against style.
   2754   explicit_constructor_match = Match(
   2755       r'\s+(?:inline\s+)?(explicit\s+)?(?:inline\s+)?%s\s*'
   2756       r'\(((?:[^()]|\([^()]*\))*)\)'
   2757       % re.escape(base_classname),
   2758       line)
   2759 
   2760   if explicit_constructor_match:
   2761     is_marked_explicit = explicit_constructor_match.group(1)
   2762 
   2763     if not explicit_constructor_match.group(2):
   2764       constructor_args = []
   2765     else:
   2766       constructor_args = explicit_constructor_match.group(2).split(',')
   2767 
   2768     # collapse arguments so that commas in template parameter lists and function
   2769     # argument parameter lists don't split arguments in two
   2770     i = 0
   2771     while i < len(constructor_args):
   2772       constructor_arg = constructor_args[i]
   2773       while (constructor_arg.count('<') > constructor_arg.count('>') or
   2774              constructor_arg.count('(') > constructor_arg.count(')')):
   2775         constructor_arg += ',' + constructor_args[i + 1]
   2776         del constructor_args[i + 1]
   2777       constructor_args[i] = constructor_arg
   2778       i += 1
   2779 
   2780     defaulted_args = [arg for arg in constructor_args if '=' in arg]
   2781     noarg_constructor = (not constructor_args or  # empty arg list
   2782                          # 'void' arg specifier
   2783                          (len(constructor_args) == 1 and
   2784                           constructor_args[0].strip() == 'void'))
   2785     onearg_constructor = ((len(constructor_args) == 1 and  # exactly one arg
   2786                            not noarg_constructor) or
   2787                           # all but at most one arg defaulted
   2788                           (len(constructor_args) >= 1 and
   2789                            not noarg_constructor and
   2790                            len(defaulted_args) >= len(constructor_args) - 1))
   2791     initializer_list_constructor = bool(
   2792         onearg_constructor and
   2793         Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0]))
   2794     copy_constructor = bool(
   2795         onearg_constructor and
   2796         Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&'
   2797               % re.escape(base_classname), constructor_args[0].strip()))
   2798 
   2799     if (not is_marked_explicit and
   2800         onearg_constructor and
   2801         not initializer_list_constructor and
   2802         not copy_constructor):
   2803       if defaulted_args:
   2804         error(filename, linenum, 'runtime/explicit', 5,
   2805               'Constructors callable with one argument '
   2806               'should be marked explicit.')
   2807       else:
   2808         error(filename, linenum, 'runtime/explicit', 5,
   2809               'Single-parameter constructors should be marked explicit.')
   2810     elif is_marked_explicit and not onearg_constructor:
   2811       if noarg_constructor:
   2812         error(filename, linenum, 'runtime/explicit', 5,
   2813               'Zero-parameter constructors should not be marked explicit.')
   2814 
   2815 
   2816 def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error):
   2817   """Checks for the correctness of various spacing around function calls.
   2818 
   2819   Args:
   2820     filename: The name of the current file.
   2821     clean_lines: A CleansedLines instance containing the file.
   2822     linenum: The number of the line to check.
   2823     error: The function to call with any errors found.
   2824   """
   2825   line = clean_lines.elided[linenum]
   2826 
   2827   # Since function calls often occur inside if/for/while/switch
   2828   # expressions - which have their own, more liberal conventions - we
   2829   # first see if we should be looking inside such an expression for a
   2830   # function call, to which we can apply more strict standards.
   2831   fncall = line    # if there's no control flow construct, look at whole line
   2832   for pattern in (r'\bif\s*\((.*)\)\s*{',
   2833                   r'\bfor\s*\((.*)\)\s*{',
   2834                   r'\bwhile\s*\((.*)\)\s*[{;]',
   2835                   r'\bswitch\s*\((.*)\)\s*{'):
   2836     match = Search(pattern, line)
   2837     if match:
   2838       fncall = match.group(1)    # look inside the parens for function calls
   2839       break
   2840 
   2841   # Except in if/for/while/switch, there should never be space
   2842   # immediately inside parens (eg "f( 3, 4 )").  We make an exception
   2843   # for nested parens ( (a+b) + c ).  Likewise, there should never be
   2844   # a space before a ( when it's a function argument.  I assume it's a
   2845   # function argument when the char before the whitespace is legal in
   2846   # a function name (alnum + _) and we're not starting a macro. Also ignore
   2847   # pointers and references to arrays and functions coz they're too tricky:
   2848   # we use a very simple way to recognize these:
   2849   # " (something)(maybe-something)" or
   2850   # " (something)(maybe-something," or
   2851   # " (something)[something]"
   2852   # Note that we assume the contents of [] to be short enough that
   2853   # they'll never need to wrap.
   2854   if (  # Ignore control structures.
   2855       not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b',
   2856                  fncall) and
   2857       # Ignore pointers/references to functions.
   2858       not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
   2859       # Ignore pointers/references to arrays.
   2860       not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
   2861     if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
   2862       error(filename, linenum, 'whitespace/parens', 4,
   2863             'Extra space after ( in function call')
   2864     elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
   2865       error(filename, linenum, 'whitespace/parens', 2,
   2866             'Extra space after (')
   2867     if (Search(r'\w\s+\(', fncall) and
   2868         not Search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and
   2869         not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and
   2870         not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and
   2871         not Search(r'\bcase\s+\(', fncall)):
   2872       # TODO(unknown): Space after an operator function seem to be a common
   2873       # error, silence those for now by restricting them to highest verbosity.
   2874       if Search(r'\boperator_*\b', line):
   2875         error(filename, linenum, 'whitespace/parens', 0,
   2876               'Extra space before ( in function call')
   2877       else:
   2878         error(filename, linenum, 'whitespace/parens', 4,
   2879               'Extra space before ( in function call')
   2880     # If the ) is followed only by a newline or a { + newline, assume it's
   2881     # part of a control statement (if/while/etc), and don't complain
   2882     if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
   2883       # If the closing parenthesis is preceded by only whitespaces,
   2884       # try to give a more descriptive error message.
   2885       if Search(r'^\s+\)', fncall):
   2886         error(filename, linenum, 'whitespace/parens', 2,
   2887               'Closing ) should be moved to the previous line')
   2888       else:
   2889         error(filename, linenum, 'whitespace/parens', 2,
   2890               'Extra space before )')
   2891 
   2892 
   2893 def IsBlankLine(line):
   2894   """Returns true if the given line is blank.
   2895 
   2896   We consider a line to be blank if the line is empty or consists of
   2897   only white spaces.
   2898 
   2899   Args:
   2900     line: A line of a string.
   2901 
   2902   Returns:
   2903     True, if the given line is blank.
   2904   """
   2905   return not line or line.isspace()
   2906 
   2907 
   2908 def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
   2909                                  error):
   2910   is_namespace_indent_item = (
   2911       len(nesting_state.stack) > 1 and
   2912       nesting_state.stack[-1].check_namespace_indentation and
   2913       isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and
   2914       nesting_state.previous_stack_top == nesting_state.stack[-2])
   2915 
   2916   if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
   2917                                      clean_lines.elided, line):
   2918     CheckItemIndentationInNamespace(filename, clean_lines.elided,
   2919                                     line, error)
   2920 
   2921 
   2922 def CheckForFunctionLengths(filename, clean_lines, linenum,
   2923                             function_state, error):
   2924   """Reports for long function bodies.
   2925 
   2926   For an overview why this is done, see:
   2927   https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
   2928 
   2929   Uses a simplistic algorithm assuming other style guidelines
   2930   (especially spacing) are followed.
   2931   Only checks unindented functions, so class members are unchecked.
   2932   Trivial bodies are unchecked, so constructors with huge initializer lists
   2933   may be missed.
   2934   Blank/comment lines are not counted so as to avoid encouraging the removal
   2935   of vertical space and comments just to get through a lint check.
   2936   NOLINT *on the last line of a function* disables this check.
   2937 
   2938   Args:
   2939     filename: The name of the current file.
   2940     clean_lines: A CleansedLines instance containing the file.
   2941     linenum: The number of the line to check.
   2942     function_state: Current function name and lines in body so far.
   2943     error: The function to call with any errors found.
   2944   """
   2945   lines = clean_lines.lines
   2946   line = lines[linenum]
   2947   joined_line = ''
   2948 
   2949   starting_func = False
   2950   regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
   2951   match_result = Match(regexp, line)
   2952   if match_result:
   2953     # If the name is all caps and underscores, figure it's a macro and
   2954     # ignore it, unless it's TEST or TEST_F.
   2955     function_name = match_result.group(1).split()[-1]
   2956     if function_name == 'TEST' or function_name == 'TEST_F' or (
   2957         not Match(r'[A-Z_]+$', function_name)):
   2958       starting_func = True
   2959 
   2960   if starting_func:
   2961     body_found = False
   2962     for start_linenum in xrange(linenum, clean_lines.NumLines()):
   2963       start_line = lines[start_linenum]
   2964       joined_line += ' ' + start_line.lstrip()
   2965       if Search(r'(;|})', start_line):  # Declarations and trivial functions
   2966         body_found = True
   2967         break                              # ... ignore
   2968       elif Search(r'{', start_line):
   2969         body_found = True
   2970         function = Search(r'((\w|:)*)\(', line).group(1)
   2971         if Match(r'TEST', function):    # Handle TEST... macros
   2972           parameter_regexp = Search(r'(\(.*\))', joined_line)
   2973           if parameter_regexp:             # Ignore bad syntax
   2974             function += parameter_regexp.group(1)
   2975         else:
   2976           function += '()'
   2977         function_state.Begin(function)
   2978         break
   2979     if not body_found:
   2980       # No body for the function (or evidence of a non-function) was found.
   2981       error(filename, linenum, 'readability/fn_size', 5,
   2982             'Lint failed to find start of function body.')
   2983   elif Match(r'^\}\s*$', line):  # function end
   2984     function_state.Check(error, filename, linenum)
   2985     function_state.End()
   2986   elif not Match(r'^\s*$', line):
   2987     function_state.Count()  # Count non-blank/non-comment lines.
   2988 
   2989 
   2990 _RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
   2991 
   2992 
   2993 def CheckComment(line, filename, linenum, next_line_start, error):
   2994   """Checks for common mistakes in comments.
   2995 
   2996   Args:
   2997     line: The line in question.
   2998     filename: The name of the current file.
   2999     linenum: The number of the line to check.
   3000     next_line_start: The first non-whitespace column of the next line.
   3001     error: The function to call with any errors found.
   3002   """
   3003   commentpos = line.find('//')
   3004   if commentpos != -1:
   3005     # Check if the // may be in quotes.  If so, ignore it
   3006     if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0:
   3007       # Allow one space for new scopes, two spaces otherwise:
   3008       if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and
   3009           ((commentpos >= 1 and
   3010             line[commentpos-1] not in string.whitespace) or
   3011            (commentpos >= 2 and
   3012             line[commentpos-2] not in string.whitespace))):
   3013         error(filename, linenum, 'whitespace/comments', 2,
   3014               'At least two spaces is best between code and comments')
   3015 
   3016       # Checks for common mistakes in TODO comments.
   3017       comment = line[commentpos:]
   3018       match = _RE_PATTERN_TODO.match(comment)
   3019       if match:
   3020         # One whitespace is correct; zero whitespace is handled elsewhere.
   3021         leading_whitespace = match.group(1)
   3022         if len(leading_whitespace) > 1:
   3023           error(filename, linenum, 'whitespace/todo', 2,
   3024                 'Too many spaces before TODO')
   3025 
   3026         username = match.group(2)
   3027         if not username:
   3028           error(filename, linenum, 'readability/todo', 2,
   3029                 'Missing username in TODO; it should look like '
   3030                 '"// TODO(my_username): Stuff."')
   3031 
   3032         middle_whitespace = match.group(3)
   3033         # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison
   3034         if middle_whitespace != ' ' and middle_whitespace != '':
   3035           error(filename, linenum, 'whitespace/todo', 2,
   3036                 'TODO(my_username) should be followed by a space')
   3037 
   3038       # If the comment contains an alphanumeric character, there
   3039       # should be a space somewhere between it and the // unless
   3040       # it's a /// or //! Doxygen comment.
   3041       if (Match(r'//[^ ]*\w', comment) and
   3042           not Match(r'(///|//\!)(\s+|$)', comment)):
   3043         error(filename, linenum, 'whitespace/comments', 4,
   3044               'Should have a space between // and comment')
   3045 
   3046 
   3047 def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
   3048   """Checks for improper use of DISALLOW* macros.
   3049 
   3050   Args:
   3051     filename: The name of the current file.
   3052     clean_lines: A CleansedLines instance containing the file.
   3053     linenum: The number of the line to check.
   3054     nesting_state: A NestingState instance which maintains information about
   3055                    the current stack of nested blocks being parsed.
   3056     error: The function to call with any errors found.
   3057   """
   3058   line = clean_lines.elided[linenum]  # get rid of comments and strings
   3059 
   3060   matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
   3061                    r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
   3062   if not matched:
   3063     return
   3064   if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
   3065     if nesting_state.stack[-1].access != 'private':
   3066       error(filename, linenum, 'readability/constructors', 3,
   3067             '%s must be in the private: section' % matched.group(1))
   3068 
   3069   else:
   3070     # Found DISALLOW* macro outside a class declaration, or perhaps it
   3071     # was used inside a function when it should have been part of the
   3072     # class declaration.  We could issue a warning here, but it
   3073     # probably resulted in a compiler error already.
   3074     pass
   3075 
   3076 
   3077 def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
   3078   """Checks for the correctness of various spacing issues in the code.
   3079 
   3080   Things we check for: spaces around operators, spaces after
   3081   if/for/while/switch, no spaces around parens in function calls, two
   3082   spaces between code and comment, don't start a block with a blank
   3083   line, don't end a function with a blank line, don't add a blank line
   3084   after public/protected/private, don't have too many blank lines in a row.
   3085 
   3086   Args:
   3087     filename: The name of the current file.
   3088     clean_lines: A CleansedLines instance containing the file.
   3089     linenum: The number of the line to check.
   3090     nesting_state: A NestingState instance which maintains information about
   3091                    the current stack of nested blocks being parsed.
   3092     error: The function to call with any errors found.
   3093   """
   3094 
   3095   # Don't use "elided" lines here, otherwise we can't check commented lines.
   3096   # Don't want to use "raw" either, because we don't want to check inside C++11
   3097   # raw strings,
   3098   raw = clean_lines.lines_without_raw_strings
   3099   line = raw[linenum]
   3100 
   3101   # Before nixing comments, check if the line is blank for no good
   3102   # reason.  This includes the first line after a block is opened, and
   3103   # blank lines at the end of a function (ie, right before a line like '}'
   3104   #
   3105   # Skip all the blank line checks if we are immediately inside a
   3106   # namespace body.  In other words, don't issue blank line warnings
   3107   # for this block:
   3108   #   namespace {
   3109   #
   3110   #   }
   3111   #
   3112   # A warning about missing end of namespace comments will be issued instead.
   3113   #
   3114   # Also skip blank line checks for 'extern "C"' blocks, which are formatted
   3115   # like namespaces.
   3116   if (IsBlankLine(line) and
   3117       not nesting_state.InNamespaceBody() and
   3118       not nesting_state.InExternC()):
   3119     elided = clean_lines.elided
   3120     prev_line = elided[linenum - 1]
   3121     prevbrace = prev_line.rfind('{')
   3122     # TODO(unknown): Don't complain if line before blank line, and line after,
   3123     #                both start with alnums and are indented the same amount.
   3124     #                This ignores whitespace at the start of a namespace block
   3125     #                because those are not usually indented.
   3126     if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
   3127       # OK, we have a blank line at the start of a code block.  Before we
   3128       # complain, we check if it is an exception to the rule: The previous
   3129       # non-empty line has the parameters of a function header that are indented
   3130       # 4 spaces (because they did not fit in a 80 column line when placed on
   3131       # the same line as the function name).  We also check for the case where
   3132       # the previous line is indented 6 spaces, which may happen when the
   3133       # initializers of a constructor do not fit into a 80 column line.
   3134       exception = False
   3135       if Match(r' {6}\w', prev_line):  # Initializer list?
   3136         # We are looking for the opening column of initializer list, which
   3137         # should be indented 4 spaces to cause 6 space indentation afterwards.
   3138         search_position = linenum-2
   3139         while (search_position >= 0
   3140                and Match(r' {6}\w', elided[search_position])):
   3141           search_position -= 1
   3142         exception = (search_position >= 0
   3143                      and elided[search_position][:5] == '    :')
   3144       else:
   3145         # Search for the function arguments or an initializer list.  We use a
   3146         # simple heuristic here: If the line is indented 4 spaces; and we have a
   3147         # closing paren, without the opening paren, followed by an opening brace
   3148         # or colon (for initializer lists) we assume that it is the last line of
   3149         # a function header.  If we have a colon indented 4 spaces, it is an
   3150         # initializer list.
   3151         exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
   3152                            prev_line)
   3153                      or Match(r' {4}:', prev_line))
   3154 
   3155       if not exception:
   3156         error(filename, linenum, 'whitespace/blank_line', 2,
   3157               'Redundant blank line at the start of a code block '
   3158               'should be deleted.')
   3159     # Ignore blank lines at the end of a block in a long if-else
   3160     # chain, like this:
   3161     #   if (condition1) {
   3162     #     // Something followed by a blank line
   3163     #
   3164     #   } else if (condition2) {
   3165     #     // Something else
   3166     #   }
   3167     if linenum + 1 < clean_lines.NumLines():
   3168       next_line = raw[linenum + 1]
   3169       if (next_line
   3170           and Match(r'\s*}', next_line)
   3171           and next_line.find('} else ') == -1):
   3172         error(filename, linenum, 'whitespace/blank_line', 3,
   3173               'Redundant blank line at the end of a code block '
   3174               'should be deleted.')
   3175 
   3176     matched = Match(r'\s*(public|protected|private):', prev_line)
   3177     if matched:
   3178       error(filename, linenum, 'whitespace/blank_line', 3,
   3179             'Do not leave a blank line after "%s:"' % matched.group(1))
   3180 
   3181   # Next, check comments
   3182   next_line_start = 0
   3183   if linenum + 1 < clean_lines.NumLines():
   3184     next_line = raw[linenum + 1]
   3185     next_line_start = len(next_line) - len(next_line.lstrip())
   3186   CheckComment(line, filename, linenum, next_line_start, error)
   3187 
   3188   # get rid of comments and strings
   3189   line = clean_lines.elided[linenum]
   3190 
   3191   # You shouldn't have spaces before your brackets, except maybe after
   3192   # 'delete []' or 'return []() {};'
   3193   if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line):
   3194     error(filename, linenum, 'whitespace/braces', 5,
   3195           'Extra space before [')
   3196 
   3197   # In range-based for, we wanted spaces before and after the colon, but
   3198   # not around "::" tokens that might appear.
   3199   if (Search(r'for *\(.*[^:]:[^: ]', line) or
   3200       Search(r'for *\(.*[^: ]:[^:]', line)):
   3201     error(filename, linenum, 'whitespace/forcolon', 2,
   3202           'Missing space around colon in range-based for loop')
   3203 
   3204 
   3205 def CheckOperatorSpacing(filename, clean_lines, linenum, error):
   3206   """Checks for horizontal spacing around operators.
   3207 
   3208   Args:
   3209     filename: The name of the current file.
   3210     clean_lines: A CleansedLines instance containing the file.
   3211     linenum: The number of the line to check.
   3212     error: The function to call with any errors found.
   3213   """
   3214   line = clean_lines.elided[linenum]
   3215 
   3216   # Don't try to do spacing checks for operator methods.  Do this by
   3217   # replacing the troublesome characters with something else,
   3218   # preserving column position for all other characters.
   3219   #
   3220   # The replacement is done repeatedly to avoid false positives from
   3221   # operators that call operators.
   3222   while True:
   3223     match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line)
   3224     if match:
   3225       line = match.group(1) + ('_' * len(match.group(2))) + match.group(3)
   3226     else:
   3227       break
   3228 
   3229   # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
   3230   # Otherwise not.  Note we only check for non-spaces on *both* sides;
   3231   # sometimes people put non-spaces on one side when aligning ='s among
   3232   # many lines (not that this is behavior that I approve of...)
   3233   if ((Search(r'[\w.]=', line) or
   3234        Search(r'=[\w.]', line))
   3235       and not Search(r'\b(if|while|for) ', line)
   3236       # Operators taken from [lex.operators] in C++11 standard.
   3237       and not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line)
   3238       and not Search(r'operator=', line)):
   3239     error(filename, linenum, 'whitespace/operators', 4,
   3240           'Missing spaces around =')
   3241 
   3242   # It's ok not to have spaces around binary operators like + - * /, but if
   3243   # there's too little whitespace, we get concerned.  It's hard to tell,
   3244   # though, so we punt on this one for now.  TODO.
   3245 
   3246   # You should always have whitespace around binary operators.
   3247   #
   3248   # Check <= and >= first to avoid false positives with < and >, then
   3249   # check non-include lines for spacing around < and >.
   3250   #
   3251   # If the operator is followed by a comma, assume it's be used in a
   3252   # macro context and don't do any checks.  This avoids false
   3253   # positives.
   3254   #
   3255   # Note that && is not included here.  This is because there are too
   3256   # many false positives due to RValue references.
   3257   match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line)
   3258   if match:
   3259     error(filename, linenum, 'whitespace/operators', 3,
   3260           'Missing spaces around %s' % match.group(1))
   3261   elif not Match(r'#.*include', line):
   3262     # Look for < that is not surrounded by spaces.  This is only
   3263     # triggered if both sides are missing spaces, even though
   3264     # technically should should flag if at least one side is missing a
   3265     # space.  This is done to avoid some false positives with shifts.
   3266     match = Match(r'^(.*[^\s<])<[^\s=<,]', line)
   3267     if match:
   3268       (_, _, end_pos) = CloseExpression(
   3269           clean_lines, linenum, len(match.group(1)))
   3270       if end_pos <= -1:
   3271         error(filename, linenum, 'whitespace/operators', 3,
   3272               'Missing spaces around <')
   3273 
   3274     # Look for > that is not surrounded by spaces.  Similar to the
   3275     # above, we only trigger if both sides are missing spaces to avoid
   3276     # false positives with shifts.
   3277     match = Match(r'^(.*[^-\s>])>[^\s=>,]', line)
   3278     if match:
   3279       (_, _, start_pos) = ReverseCloseExpression(
   3280           clean_lines, linenum, len(match.group(1)))
   3281       if start_pos <= -1:
   3282         error(filename, linenum, 'whitespace/operators', 3,
   3283               'Missing spaces around >')
   3284 
   3285   # We allow no-spaces around << when used like this: 10<<20, but
   3286   # not otherwise (particularly, not when used as streams)
   3287   #
   3288   # We also allow operators following an opening parenthesis, since
   3289   # those tend to be macros that deal with operators.
   3290   match = Search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line)
   3291   if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and
   3292       not (match.group(1) == 'operator' and match.group(2) == ';')):
   3293     error(filename, linenum, 'whitespace/operators', 3,
   3294           'Missing spaces around <<')
   3295 
   3296   # We allow no-spaces around >> for almost anything.  This is because
   3297   # C++11 allows ">>" to close nested templates, which accounts for
   3298   # most cases when ">>" is not followed by a space.
   3299   #
   3300   # We still warn on ">>" followed by alpha character, because that is
   3301   # likely due to ">>" being used for right shifts, e.g.:
   3302   #   value >> alpha
   3303   #
   3304   # When ">>" is used to close templates, the alphanumeric letter that
   3305   # follows would be part of an identifier, and there should still be
   3306   # a space separating the template type and the identifier.
   3307   #   type<type<type>> alpha
   3308   match = Search(r'>>[a-zA-Z_]', line)
   3309   if match:
   3310     error(filename, linenum, 'whitespace/operators', 3,
   3311           'Missing spaces around >>')
   3312 
   3313   # There shouldn't be space around unary operators
   3314   match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
   3315   if match:
   3316     error(filename, linenum, 'whitespace/operators', 4,
   3317           'Extra space for operator %s' % match.group(1))
   3318 
   3319 
   3320 def CheckParenthesisSpacing(filename, clean_lines, linenum, error):
   3321   """Checks for horizontal spacing around parentheses.
   3322 
   3323   Args:
   3324     filename: The name of the current file.
   3325     clean_lines: A CleansedLines instance containing the file.
   3326     linenum: The number of the line to check.
   3327     error: The function to call with any errors found.
   3328   """
   3329   line = clean_lines.elided[linenum]
   3330 
   3331   # No spaces after an if, while, switch, or for
   3332   match = Search(r' (if\(|for\(|while\(|switch\()', line)
   3333   if match:
   3334     error(filename, linenum, 'whitespace/parens', 5,
   3335           'Missing space before ( in %s' % match.group(1))
   3336 
   3337   # For if/for/while/switch, the left and right parens should be
   3338   # consistent about how many spaces are inside the parens, and
   3339   # there should either be zero or one spaces inside the parens.
   3340   # We don't want: "if ( foo)" or "if ( foo   )".
   3341   # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   3342   match = Search(r'\b(if|for|while|switch)\s*'
   3343                  r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
   3344                  line)
   3345   if match:
   3346     if len(match.group(2)) != len(match.group(4)):
   3347       if not (match.group(3) == ';' and
   3348               len(match.group(2)) == 1 + len(match.group(4)) or
   3349               not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
   3350         error(filename, linenum, 'whitespace/parens', 5,
   3351               'Mismatching spaces inside () in %s' % match.group(1))
   3352     if len(match.group(2)) not in [0, 1]:
   3353       error(filename, linenum, 'whitespace/parens', 5,
   3354             'Should have zero or one spaces inside ( and ) in %s' %
   3355             match.group(1))
   3356 
   3357 
   3358 def CheckCommaSpacing(filename, clean_lines, linenum, error):
   3359   """Checks for horizontal spacing near commas and semicolons.
   3360 
   3361   Args:
   3362     filename: The name of the current file.
   3363     clean_lines: A CleansedLines instance containing the file.
   3364     linenum: The number of the line to check.
   3365     error: The function to call with any errors found.
   3366   """
   3367   raw = clean_lines.lines_without_raw_strings
   3368   line = clean_lines.elided[linenum]
   3369 
   3370   # You should always have a space after a comma (either as fn arg or operator)
   3371   #
   3372   # This does not apply when the non-space character following the
   3373   # comma is another comma, since the only time when that happens is
   3374   # for empty macro arguments.
   3375   #
   3376   # We run this check in two passes: first pass on elided lines to
   3377   # verify that lines contain missing whitespaces, second pass on raw
   3378   # lines to confirm that those missing whitespaces are not due to
   3379   # elided comments.
   3380   if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and
   3381       Search(r',[^,\s]', raw[linenum])):
   3382     error(filename, linenum, 'whitespace/comma', 3,
   3383           'Missing space after ,')
   3384 
   3385   # You should always have a space after a semicolon
   3386   # except for few corner cases
   3387   # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
   3388   # space after ;
   3389   if Search(r';[^\s};\\)/]', line):
   3390     error(filename, linenum, 'whitespace/semicolon', 3,
   3391           'Missing space after ;')
   3392 
   3393 
   3394 def _IsType(clean_lines, nesting_state, expr):
   3395   """Check if expression looks like a type name, returns true if so.
   3396 
   3397   Args:
   3398     clean_lines: A CleansedLines instance containing the file.
   3399     nesting_state: A NestingState instance which maintains information about
   3400                    the current stack of nested blocks being parsed.
   3401     expr: The expression to check.
   3402   Returns:
   3403     True, if token looks like a type.
   3404   """
   3405   # Keep only the last token in the expression
   3406   last_word = Match(r'^.*(\b\S+)$', expr)
   3407   if last_word:
   3408     token = last_word.group(1)
   3409   else:
   3410     token = expr
   3411 
   3412   # Match native types and stdint types
   3413   if _TYPES.match(token):
   3414     return True
   3415 
   3416   # Try a bit harder to match templated types.  Walk up the nesting
   3417   # stack until we find something that resembles a typename
   3418   # declaration for what we are looking for.
   3419   typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) +
   3420                       r'\b')
   3421   block_index = len(nesting_state.stack) - 1
   3422   while block_index >= 0:
   3423     if isinstance(nesting_state.stack[block_index], _NamespaceInfo):
   3424       return False
   3425 
   3426     # Found where the opening brace is.  We want to scan from this
   3427     # line up to the beginning of the function, minus a few lines.
   3428     #   template <typename Type1,  // stop scanning here
   3429     #             ...>
   3430     #   class C
   3431     #     : public ... {  // start scanning here
   3432     last_line = nesting_state.stack[block_index].starting_linenum
   3433 
   3434     next_block_start = 0
   3435     if block_index > 0:
   3436       next_block_start = nesting_state.stack[block_index - 1].starting_linenum
   3437     first_line = last_line
   3438     while first_line >= next_block_start:
   3439       if clean_lines.elided[first_line].find('template') >= 0:
   3440         break
   3441       first_line -= 1
   3442     if first_line < next_block_start:
   3443       # Didn't find any "template" keyword before reaching the next block,
   3444       # there are probably no template things to check for this block
   3445       block_index -= 1
   3446       continue
   3447 
   3448     # Look for typename in the specified range
   3449     for i in xrange(first_line, last_line + 1, 1):
   3450       if Search(typename_pattern, clean_lines.elided[i]):
   3451         return True
   3452     block_index -= 1
   3453 
   3454   return False
   3455 
   3456 
   3457 def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error):
   3458   """Checks for horizontal spacing near commas.
   3459 
   3460   Args:
   3461     filename: The name of the current file.
   3462     clean_lines: A CleansedLines instance containing the file.
   3463     linenum: The number of the line to check.
   3464     nesting_state: A NestingState instance which maintains information about
   3465                    the current stack of nested blocks being parsed.
   3466     error: The function to call with any errors found.
   3467   """
   3468   line = clean_lines.elided[linenum]
   3469 
   3470   # Except after an opening paren, or after another opening brace (in case of
   3471   # an initializer list, for instance), you should have spaces before your
   3472   # braces when they are delimiting blocks, classes, namespaces etc.
   3473   # And since you should never have braces at the beginning of a line,
   3474   # this is an easy test.  Except that braces used for initialization don't
   3475   # follow the same rule; we often don't want spaces before those.
   3476   match = Match(r'^(.*[^ ({>]){', line)
   3477 
   3478   if match:
   3479     # Try a bit harder to check for brace initialization.  This
   3480     # happens in one of the following forms:
   3481     #   Constructor() : initializer_list_{} { ... }
   3482     #   Constructor{}.MemberFunction()
   3483     #   Type variable{};
   3484     #   FunctionCall(type{}, ...);
   3485     #   LastArgument(..., type{});
   3486     #   LOG(INFO) << type{} << " ...";
   3487     #   map_of_type[{...}] = ...;
   3488     #   ternary = expr ? new type{} : nullptr;
   3489     #   OuterTemplate<InnerTemplateConstructor<Type>{}>
   3490     #
   3491     # We check for the character following the closing brace, and
   3492     # silence the warning if it's one of those listed above, i.e.
   3493     # "{.;,)<>]:".
   3494     #
   3495     # To account for nested initializer list, we allow any number of
   3496     # closing braces up to "{;,)<".  We can't simply silence the
   3497     # warning on first sight of closing brace, because that would
   3498     # cause false negatives for things that are not initializer lists.
   3499     #   Silence this:         But not this:
   3500     #     Outer{                if (...) {
   3501     #       Inner{...}            if (...){  // Missing space before {
   3502     #     };                    }
   3503     #
   3504     # There is a false negative with this approach if people inserted
   3505     # spurious semicolons, e.g. "if (cond){};", but we will catch the
   3506     # spurious semicolon with a separate check.
   3507     leading_text = match.group(1)
   3508     (endline, endlinenum, endpos) = CloseExpression(
   3509         clean_lines, linenum, len(match.group(1)))
   3510     trailing_text = ''
   3511     if endpos > -1:
   3512       trailing_text = endline[endpos:]
   3513     for offset in xrange(endlinenum + 1,
   3514                          min(endlinenum + 3, clean_lines.NumLines() - 1)):
   3515       trailing_text += clean_lines.elided[offset]
   3516     # We also suppress warnings for `uint64_t{expression}` etc., as the style
   3517     # guide recommends brace initialization for integral types to avoid
   3518     # overflow/truncation.
   3519     if (not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text)
   3520         and not _IsType(clean_lines, nesting_state, leading_text)):
   3521       error(filename, linenum, 'whitespace/braces', 5,
   3522             'Missing space before {')
   3523 
   3524   # Make sure '} else {' has spaces.
   3525   if Search(r'}else', line):
   3526     error(filename, linenum, 'whitespace/braces', 5,
   3527           'Missing space before else')
   3528 
   3529   # You shouldn't have a space before a semicolon at the end of the line.
   3530   # There's a special case for "for" since the style guide allows space before
   3531   # the semicolon there.
   3532   if Search(r':\s*;\s*$', line):
   3533     error(filename, linenum, 'whitespace/semicolon', 5,
   3534           'Semicolon defining empty statement. Use {} instead.')
   3535   elif Search(r'^\s*;\s*$', line):
   3536     error(filename, linenum, 'whitespace/semicolon', 5,
   3537           'Line contains only semicolon. If this should be an empty statement, '
   3538           'use {} instead.')
   3539   elif (Search(r'\s+;\s*$', line) and
   3540         not Search(r'\bfor\b', line)):
   3541     error(filename, linenum, 'whitespace/semicolon', 5,
   3542           'Extra space before last semicolon. If this should be an empty '
   3543           'statement, use {} instead.')
   3544 
   3545 
   3546 def IsDecltype(clean_lines, linenum, column):
   3547   """Check if the token ending on (linenum, column) is decltype().
   3548 
   3549   Args:
   3550     clean_lines: A CleansedLines instance containing the file.
   3551     linenum: the number of the line to check.
   3552     column: end column of the token to check.
   3553   Returns:
   3554     True if this token is decltype() expression, False otherwise.
   3555   """
   3556   (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column)
   3557   if start_col < 0:
   3558     return False
   3559   if Search(r'\bdecltype\s*$', text[0:start_col]):
   3560     return True
   3561   return False
   3562 
   3563 
   3564 def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
   3565   """Checks for additional blank line issues related to sections.
   3566 
   3567   Currently the only thing checked here is blank line before protected/private.
   3568 
   3569   Args:
   3570     filename: The name of the current file.
   3571     clean_lines: A CleansedLines instance containing the file.
   3572     class_info: A _ClassInfo objects.
   3573     linenum: The number of the line to check.
   3574     error: The function to call with any errors found.
   3575   """
   3576   # Skip checks if the class is small, where small means 25 lines or less.
   3577   # 25 lines seems like a good cutoff since that's the usual height of
   3578   # terminals, and any class that can't fit in one screen can't really
   3579   # be considered "small".
   3580   #
   3581   # Also skip checks if we are on the first line.  This accounts for
   3582   # classes that look like
   3583   #   class Foo { public: ... };
   3584   #
   3585   # If we didn't find the end of the class, last_line would be zero,
   3586   # and the check will be skipped by the first condition.
   3587   if (class_info.last_line - class_info.starting_linenum <= 24 or
   3588       linenum <= class_info.starting_linenum):
   3589     return
   3590 
   3591   matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
   3592   if matched:
   3593     # Issue warning if the line before public/protected/private was
   3594     # not a blank line, but don't do this if the previous line contains
   3595     # "class" or "struct".  This can happen two ways:
   3596     #  - We are at the beginning of the class.
   3597     #  - We are forward-declaring an inner class that is semantically
   3598     #    private, but needed to be public for implementation reasons.
   3599     # Also ignores cases where the previous line ends with a backslash as can be
   3600     # common when defining classes in C macros.
   3601     prev_line = clean_lines.lines[linenum - 1]
   3602     if (not IsBlankLine(prev_line) and
   3603         not Search(r'\b(class|struct)\b', prev_line) and
   3604         not Search(r'\\$', prev_line)):
   3605       # Try a bit harder to find the beginning of the class.  This is to
   3606       # account for multi-line base-specifier lists, e.g.:
   3607       #   class Derived
   3608       #       : public Base {
   3609       end_class_head = class_info.starting_linenum
   3610       for i in range(class_info.starting_linenum, linenum):
   3611         if Search(r'\{\s*$', clean_lines.lines[i]):
   3612           end_class_head = i
   3613           break
   3614       if end_class_head < linenum - 1:
   3615         error(filename, linenum, 'whitespace/blank_line', 3,
   3616               '"%s:" should be preceded by a blank line' % matched.group(1))
   3617 
   3618 
   3619 def GetPreviousNonBlankLine(clean_lines, linenum):
   3620   """Return the most recent non-blank line and its line number.
   3621 
   3622   Args:
   3623     clean_lines: A CleansedLines instance containing the file contents.
   3624     linenum: The number of the line to check.
   3625 
   3626   Returns:
   3627     A tuple with two elements.  The first element is the contents of the last
   3628     non-blank line before the current line, or the empty string if this is the
   3629     first non-blank line.  The second is the line number of that line, or -1
   3630     if this is the first non-blank line.
   3631   """
   3632 
   3633   prevlinenum = linenum - 1
   3634   while prevlinenum >= 0:
   3635     prevline = clean_lines.elided[prevlinenum]
   3636     if not IsBlankLine(prevline):     # if not a blank line...
   3637       return (prevline, prevlinenum)
   3638     prevlinenum -= 1
   3639   return ('', -1)
   3640 
   3641 
   3642 def CheckBraces(filename, clean_lines, linenum, error):
   3643   """Looks for misplaced braces (e.g. at the end of line).
   3644 
   3645   Args:
   3646     filename: The name of the current file.
   3647     clean_lines: A CleansedLines instance containing the file.
   3648     linenum: The number of the line to check.
   3649     error: The function to call with any errors found.
   3650   """
   3651 
   3652   line = clean_lines.elided[linenum]        # get rid of comments and strings
   3653 
   3654   if Match(r'\s*{\s*$', line):
   3655     # We allow an open brace to start a line in the case where someone is using
   3656     # braces in a block to explicitly create a new scope, which is commonly used
   3657     # to control the lifetime of stack-allocated variables.  Braces are also
   3658     # used for brace initializers inside function calls.  We don't detect this
   3659     # perfectly: we just don't complain if the last non-whitespace character on
   3660     # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the
   3661     # previous line starts a preprocessor block. We also allow a brace on the
   3662     # following line if it is part of an array initialization and would not fit
   3663     # within the 80 character limit of the preceding line.
   3664     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   3665     if (not Search(r'[,;:}{(]\s*$', prevline) and
   3666         not Match(r'\s*#', prevline) and
   3667         not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)):
   3668       error(filename, linenum, 'whitespace/braces', 4,
   3669             '{ should almost always be at the end of the previous line')
   3670 
   3671   # An else clause should be on the same line as the preceding closing brace.
   3672   if Match(r'\s*else\b\s*(?:if\b|\{|$)', line):
   3673     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   3674     if Match(r'\s*}\s*$', prevline):
   3675       error(filename, linenum, 'whitespace/newline', 4,
   3676             'An else should appear on the same line as the preceding }')
   3677 
   3678   # If braces come on one side of an else, they should be on both.
   3679   # However, we have to worry about "else if" that spans multiple lines!
   3680   if Search(r'else if\s*\(', line):       # could be multi-line if
   3681     brace_on_left = bool(Search(r'}\s*else if\s*\(', line))
   3682     # find the ( after the if
   3683     pos = line.find('else if')
   3684     pos = line.find('(', pos)
   3685     if pos > 0:
   3686       (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
   3687       brace_on_right = endline[endpos:].find('{') != -1
   3688       if brace_on_left != brace_on_right:    # must be brace after if
   3689         error(filename, linenum, 'readability/braces', 5,
   3690               'If an else has a brace on one side, it should have it on both')
   3691   elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
   3692     error(filename, linenum, 'readability/braces', 5,
   3693           'If an else has a brace on one side, it should have it on both')
   3694 
   3695   # Likewise, an else should never have the else clause on the same line
   3696   if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
   3697     error(filename, linenum, 'whitespace/newline', 4,
   3698           'Else clause should never be on same line as else (use 2 lines)')
   3699 
   3700   # In the same way, a do/while should never be on one line
   3701   if Match(r'\s*do [^\s{]', line):
   3702     error(filename, linenum, 'whitespace/newline', 4,
   3703           'do/while clauses should not be on a single line')
   3704 
   3705   # Check single-line if/else bodies. The style guide says 'curly braces are not
   3706   # required for single-line statements'. We additionally allow multi-line,
   3707   # single statements, but we reject anything with more than one semicolon in
   3708   # it. This means that the first semicolon after the if should be at the end of
   3709   # its line, and the line after that should have an indent level equal to or
   3710   # lower than the if. We also check for ambiguous if/else nesting without
   3711   # braces.
   3712   if_else_match = Search(r'\b(if\s*\(|else\b)', line)
   3713   if if_else_match and not Match(r'\s*#', line):
   3714     if_indent = GetIndentLevel(line)
   3715     endline, endlinenum, endpos = line, linenum, if_else_match.end()
   3716     if_match = Search(r'\bif\s*\(', line)
   3717     if if_match:
   3718       # This could be a multiline if condition, so find the end first.
   3719       pos = if_match.end() - 1
   3720       (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos)
   3721     # Check for an opening brace, either directly after the if or on the next
   3722     # line. If found, this isn't a single-statement conditional.
   3723     if (not Match(r'\s*{', endline[endpos:])
   3724         and not (Match(r'\s*$', endline[endpos:])
   3725                  and endlinenum < (len(clean_lines.elided) - 1)
   3726                  and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))):
   3727       while (endlinenum < len(clean_lines.elided)
   3728              and ';' not in clean_lines.elided[endlinenum][endpos:]):
   3729         endlinenum += 1
   3730         endpos = 0
   3731       if endlinenum < len(clean_lines.elided):
   3732         endline = clean_lines.elided[endlinenum]
   3733         # We allow a mix of whitespace and closing braces (e.g. for one-liner
   3734         # methods) and a single \ after the semicolon (for macros)
   3735         endpos = endline.find(';')
   3736         if not Match(r';[\s}]*(\\?)$', endline[endpos:]):
   3737           # Semicolon isn't the last character, there's something trailing.
   3738           # Output a warning if the semicolon is not contained inside
   3739           # a lambda expression.
   3740           if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$',
   3741                        endline):
   3742             error(filename, linenum, 'readability/braces', 4,
   3743                   'If/else bodies with multiple statements require braces')
   3744         elif endlinenum < len(clean_lines.elided) - 1:
   3745           # Make sure the next line is dedented
   3746           next_line = clean_lines.elided[endlinenum + 1]
   3747           next_indent = GetIndentLevel(next_line)
   3748           # With ambiguous nested if statements, this will error out on the
   3749           # if that *doesn't* match the else, regardless of whether it's the
   3750           # inner one or outer one.
   3751           if (if_match and Match(r'\s*else\b', next_line)
   3752               and next_indent != if_indent):
   3753             error(filename, linenum, 'readability/braces', 4,
   3754                   'Else clause should be indented at the same level as if. '
   3755                   'Ambiguous nested if/else chains require braces.')
   3756           elif next_indent > if_indent:
   3757             error(filename, linenum, 'readability/braces', 4,
   3758                   'If/else bodies with multiple statements require braces')
   3759 
   3760 
   3761 def CheckTrailingSemicolon(filename, clean_lines, linenum, error):
   3762   """Looks for redundant trailing semicolon.
   3763 
   3764   Args:
   3765     filename: The name of the current file.
   3766     clean_lines: A CleansedLines instance containing the file.
   3767     linenum: The number of the line to check.
   3768     error: The function to call with any errors found.
   3769   """
   3770 
   3771   line = clean_lines.elided[linenum]
   3772 
   3773   # Block bodies should not be followed by a semicolon.  Due to C++11
   3774   # brace initialization, there are more places where semicolons are
   3775   # required than not, so we use a whitelist approach to check these
   3776   # rather than a blacklist.  These are the places where "};" should
   3777   # be replaced by just "}":
   3778   # 1. Some flavor of block following closing parenthesis:
   3779   #    for (;;) {};
   3780   #    while (...) {};
   3781   #    switch (...) {};
   3782   #    Function(...) {};
   3783   #    if (...) {};
   3784   #    if (...) else if (...) {};
   3785   #
   3786   # 2. else block:
   3787   #    if (...) else {};
   3788   #
   3789   # 3. const member function:
   3790   #    Function(...) const {};
   3791   #
   3792   # 4. Block following some statement:
   3793   #    x = 42;
   3794   #    {};
   3795   #
   3796   # 5. Block at the beginning of a function:
   3797   #    Function(...) {
   3798   #      {};
   3799   #    }
   3800   #
   3801   #    Note that naively checking for the preceding "{" will also match
   3802   #    braces inside multi-dimensional arrays, but this is fine since
   3803   #    that expression will not contain semicolons.
   3804   #
   3805   # 6. Block following another block:
   3806   #    while (true) {}
   3807   #    {};
   3808   #
   3809   # 7. End of namespaces:
   3810   #    namespace {};
   3811   #
   3812   #    These semicolons seems far more common than other kinds of
   3813   #    redundant semicolons, possibly due to people converting classes
   3814   #    to namespaces.  For now we do not warn for this case.
   3815   #
   3816   # Try matching case 1 first.
   3817   match = Match(r'^(.*\)\s*)\{', line)
   3818   if match:
   3819     # Matched closing parenthesis (case 1).  Check the token before the
   3820     # matching opening parenthesis, and don't warn if it looks like a
   3821     # macro.  This avoids these false positives:
   3822     #  - macro that defines a base class
   3823     #  - multi-line macro that defines a base class
   3824     #  - macro that defines the whole class-head
   3825     #
   3826     # But we still issue warnings for macros that we know are safe to
   3827     # warn, specifically:
   3828     #  - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P
   3829     #  - TYPED_TEST
   3830     #  - INTERFACE_DEF
   3831     #  - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED:
   3832     #
   3833     # We implement a whitelist of safe macros instead of a blacklist of
   3834     # unsafe macros, even though the latter appears less frequently in
   3835     # google code and would have been easier to implement.  This is because
   3836     # the downside for getting the whitelist wrong means some extra
   3837     # semicolons, while the downside for getting the blacklist wrong
   3838     # would result in compile errors.
   3839     #
   3840     # In addition to macros, we also don't want to warn on
   3841     #  - Compound literals
   3842     #  - Lambdas
   3843     #  - alignas specifier with anonymous structs
   3844     #  - decltype
   3845     closing_brace_pos = match.group(1).rfind(')')
   3846     opening_parenthesis = ReverseCloseExpression(
   3847         clean_lines, linenum, closing_brace_pos)
   3848     if opening_parenthesis[2] > -1:
   3849       line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]]
   3850       macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix)
   3851       func = Match(r'^(.*\])\s*$', line_prefix)
   3852       if ((macro and
   3853            macro.group(1) not in (
   3854                'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST',
   3855                'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED',
   3856                'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or
   3857           (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or
   3858           Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or
   3859           Search(r'\bdecltype$', line_prefix) or
   3860           Search(r'\s+=\s*$', line_prefix)):
   3861         match = None
   3862     if (match and
   3863         opening_parenthesis[1] > 1 and
   3864         Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])):
   3865       # Multi-line lambda-expression
   3866       match = None
   3867 
   3868   else:
   3869     # Try matching cases 2-3.
   3870     match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line)
   3871     if not match:
   3872       # Try matching cases 4-6.  These are always matched on separate lines.
   3873       #
   3874       # Note that we can't simply concatenate the previous line to the
   3875       # current line and do a single match, otherwise we may output
   3876       # duplicate warnings for the blank line case:
   3877       #   if (cond) {
   3878       #     // blank line
   3879       #   }
   3880       prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   3881       if prevline and Search(r'[;{}]\s*$', prevline):
   3882         match = Match(r'^(\s*)\{', line)
   3883 
   3884   # Check matching closing brace
   3885   if match:
   3886     (endline, endlinenum, endpos) = CloseExpression(
   3887         clean_lines, linenum, len(match.group(1)))
   3888     if endpos > -1 and Match(r'^\s*;', endline[endpos:]):
   3889       # Current {} pair is eligible for semicolon check, and we have found
   3890       # the redundant semicolon, output warning here.
   3891       #
   3892       # Note: because we are scanning forward for opening braces, and
   3893       # outputting warnings for the matching closing brace, if there are
   3894       # nested blocks with trailing semicolons, we will get the error
   3895       # messages in reversed order.
   3896 
   3897       # We need to check the line forward for NOLINT
   3898       raw_lines = clean_lines.raw_lines
   3899       ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1,
   3900                               error)
   3901       ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum,
   3902                               error)
   3903 
   3904       error(filename, endlinenum, 'readability/braces', 4,
   3905             "You don't need a ; after a }")
   3906 
   3907 
   3908 def CheckEmptyBlockBody(filename, clean_lines, linenum, error):
   3909   """Look for empty loop/conditional body with only a single semicolon.
   3910 
   3911   Args:
   3912     filename: The name of the current file.
   3913     clean_lines: A CleansedLines instance containing the file.
   3914     linenum: The number of the line to check.
   3915     error: The function to call with any errors found.
   3916   """
   3917 
   3918   # Search for loop keywords at the beginning of the line.  Because only
   3919   # whitespaces are allowed before the keywords, this will also ignore most
   3920   # do-while-loops, since those lines should start with closing brace.
   3921   #
   3922   # We also check "if" blocks here, since an empty conditional block
   3923   # is likely an error.
   3924   line = clean_lines.elided[linenum]
   3925   matched = Match(r'\s*(for|while|if)\s*\(', line)
   3926   if matched:
   3927     # Find the end of the conditional expression.
   3928     (end_line, end_linenum, end_pos) = CloseExpression(
   3929         clean_lines, linenum, line.find('('))
   3930 
   3931     # Output warning if what follows the condition expression is a semicolon.
   3932     # No warning for all other cases, including whitespace or newline, since we
   3933     # have a separate check for semicolons preceded by whitespace.
   3934     if end_pos >= 0 and Match(r';', end_line[end_pos:]):
   3935       if matched.group(1) == 'if':
   3936         error(filename, end_linenum, 'whitespace/empty_conditional_body', 5,
   3937               'Empty conditional bodies should use {}')
   3938       else:
   3939         error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
   3940               'Empty loop bodies should use {} or continue')
   3941 
   3942     # Check for if statements that have completely empty bodies (no comments)
   3943     # and no else clauses.
   3944     if end_pos >= 0 and matched.group(1) == 'if':
   3945       # Find the position of the opening { for the if statement.
   3946       # Return without logging an error if it has no brackets.
   3947       opening_linenum = end_linenum
   3948       opening_line_fragment = end_line[end_pos:]
   3949       # Loop until EOF or find anything that's not whitespace or opening {.
   3950       while not Search(r'^\s*\{', opening_line_fragment):
   3951         if Search(r'^(?!\s*$)', opening_line_fragment):
   3952           # Conditional has no brackets.
   3953           return
   3954         opening_linenum += 1
   3955         if opening_linenum == len(clean_lines.elided):
   3956           # Couldn't find conditional's opening { or any code before EOF.
   3957           return
   3958         opening_line_fragment = clean_lines.elided[opening_linenum]
   3959       # Set opening_line (opening_line_fragment may not be entire opening line).
   3960       opening_line = clean_lines.elided[opening_linenum]
   3961 
   3962       # Find the position of the closing }.
   3963       opening_pos = opening_line_fragment.find('{')
   3964       if opening_linenum == end_linenum:
   3965         # We need to make opening_pos relative to the start of the entire line.
   3966         opening_pos += end_pos
   3967       (closing_line, closing_linenum, closing_pos) = CloseExpression(
   3968           clean_lines, opening_linenum, opening_pos)
   3969       if closing_pos < 0:
   3970         return
   3971 
   3972       # Now construct the body of the conditional. This consists of the portion
   3973       # of the opening line after the {, all lines until the closing line,
   3974       # and the portion of the closing line before the }.
   3975       if (clean_lines.raw_lines[opening_linenum] !=
   3976           CleanseComments(clean_lines.raw_lines[opening_linenum])):
   3977         # Opening line ends with a comment, so conditional isn't empty.
   3978         return
   3979       if closing_linenum > opening_linenum:
   3980         # Opening line after the {. Ignore comments here since we checked above.
   3981         body = list(opening_line[opening_pos+1:])
   3982         # All lines until closing line, excluding closing line, with comments.
   3983         body.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum])
   3984         # Closing line before the }. Won't (and can't) have comments.
   3985         body.append(clean_lines.elided[closing_linenum][:closing_pos-1])
   3986         body = '\n'.join(body)
   3987       else:
   3988         # If statement has brackets and fits on a single line.
   3989         body = opening_line[opening_pos+1:closing_pos-1]
   3990 
   3991       # Check if the body is empty
   3992       if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body):
   3993         return
   3994       # The body is empty. Now make sure there's not an else clause.
   3995       current_linenum = closing_linenum
   3996       current_line_fragment = closing_line[closing_pos:]
   3997       # Loop until EOF or find anything that's not whitespace or else clause.
   3998       while Search(r'^\s*$|^(?=\s*else)', current_line_fragment):
   3999         if Search(r'^(?=\s*else)', current_line_fragment):
   4000           # Found an else clause, so don't log an error.
   4001           return
   4002         current_linenum += 1
   4003         if current_linenum == len(clean_lines.elided):
   4004           break
   4005         current_line_fragment = clean_lines.elided[current_linenum]
   4006 
   4007       # The body is empty and there's no else clause until EOF or other code.
   4008       error(filename, end_linenum, 'whitespace/empty_if_body', 4,
   4009             ('If statement had no body and no else clause'))
   4010 
   4011 
   4012 def FindCheckMacro(line):
   4013   """Find a replaceable CHECK-like macro.
   4014 
   4015   Args:
   4016     line: line to search on.
   4017   Returns:
   4018     (macro name, start position), or (None, -1) if no replaceable
   4019     macro is found.
   4020   """
   4021   for macro in _CHECK_MACROS:
   4022     i = line.find(macro)
   4023     if i >= 0:
   4024       # Find opening parenthesis.  Do a regular expression match here
   4025       # to make sure that we are matching the expected CHECK macro, as
   4026       # opposed to some other macro that happens to contain the CHECK
   4027       # substring.
   4028       matched = Match(r'^(.*\b' + macro + r'\s*)\(', line)
   4029       if not matched:
   4030         continue
   4031       return (macro, len(matched.group(1)))
   4032   return (None, -1)
   4033 
   4034 
   4035 def CheckCheck(filename, clean_lines, linenum, error):
   4036   """Checks the use of CHECK and EXPECT macros.
   4037 
   4038   Args:
   4039     filename: The name of the current file.
   4040     clean_lines: A CleansedLines instance containing the file.
   4041     linenum: The number of the line to check.
   4042     error: The function to call with any errors found.
   4043   """
   4044 
   4045   # Decide the set of replacement macros that should be suggested
   4046   lines = clean_lines.elided
   4047   (check_macro, start_pos) = FindCheckMacro(lines[linenum])
   4048   if not check_macro:
   4049     return
   4050 
   4051   # Find end of the boolean expression by matching parentheses
   4052   (last_line, end_line, end_pos) = CloseExpression(
   4053       clean_lines, linenum, start_pos)
   4054   if end_pos < 0:
   4055     return
   4056 
   4057   # If the check macro is followed by something other than a
   4058   # semicolon, assume users will log their own custom error messages
   4059   # and don't suggest any replacements.
   4060   if not Match(r'\s*;', last_line[end_pos:]):
   4061     return
   4062 
   4063   if linenum == end_line:
   4064     expression = lines[linenum][start_pos + 1:end_pos - 1]
   4065   else:
   4066     expression = lines[linenum][start_pos + 1:]
   4067     for i in xrange(linenum + 1, end_line):
   4068       expression += lines[i]
   4069     expression += last_line[0:end_pos - 1]
   4070 
   4071   # Parse expression so that we can take parentheses into account.
   4072   # This avoids false positives for inputs like "CHECK((a < 4) == b)",
   4073   # which is not replaceable by CHECK_LE.
   4074   lhs = ''
   4075   rhs = ''
   4076   operator = None
   4077   while expression:
   4078     matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||'
   4079                     r'==|!=|>=|>|<=|<|\()(.*)$', expression)
   4080     if matched:
   4081       token = matched.group(1)
   4082       if token == '(':
   4083         # Parenthesized operand
   4084         expression = matched.group(2)
   4085         (end, _) = FindEndOfExpressionInLine(expression, 0, ['('])
   4086         if end < 0:
   4087           return  # Unmatched parenthesis
   4088         lhs += '(' + expression[0:end]
   4089         expression = expression[end:]
   4090       elif token in ('&&', '||'):
   4091         # Logical and/or operators.  This means the expression
   4092         # contains more than one term, for example:
   4093         #   CHECK(42 < a && a < b);
   4094         #
   4095         # These are not replaceable with CHECK_LE, so bail out early.
   4096         return
   4097       elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'):
   4098         # Non-relational operator
   4099         lhs += token
   4100         expression = matched.group(2)
   4101       else:
   4102         # Relational operator
   4103         operator = token
   4104         rhs = matched.group(2)
   4105         break
   4106     else:
   4107       # Unparenthesized operand.  Instead of appending to lhs one character
   4108       # at a time, we do another regular expression match to consume several
   4109       # characters at once if possible.  Trivial benchmark shows that this
   4110       # is more efficient when the operands are longer than a single
   4111       # character, which is generally the case.
   4112       matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression)
   4113       if not matched:
   4114         matched = Match(r'^(\s*\S)(.*)$', expression)
   4115         if not matched:
   4116           break
   4117       lhs += matched.group(1)
   4118       expression = matched.group(2)
   4119 
   4120   # Only apply checks if we got all parts of the boolean expression
   4121   if not (lhs and operator and rhs):
   4122     return
   4123 
   4124   # Check that rhs do not contain logical operators.  We already know
   4125   # that lhs is fine since the loop above parses out && and ||.
   4126   if rhs.find('&&') > -1 or rhs.find('||') > -1:
   4127     return
   4128 
   4129   # At least one of the operands must be a constant literal.  This is
   4130   # to avoid suggesting replacements for unprintable things like
   4131   # CHECK(variable != iterator)
   4132   #
   4133   # The following pattern matches decimal, hex integers, strings, and
   4134   # characters (in that order).
   4135   lhs = lhs.strip()
   4136   rhs = rhs.strip()
   4137   match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$'
   4138   if Match(match_constant, lhs) or Match(match_constant, rhs):
   4139     # Note: since we know both lhs and rhs, we can provide a more
   4140     # descriptive error message like:
   4141     #   Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42)
   4142     # Instead of:
   4143     #   Consider using CHECK_EQ instead of CHECK(a == b)
   4144     #
   4145     # We are still keeping the less descriptive message because if lhs
   4146     # or rhs gets long, the error message might become unreadable.
   4147     error(filename, linenum, 'readability/check', 2,
   4148           'Consider using %s instead of %s(a %s b)' % (
   4149               _CHECK_REPLACEMENT[check_macro][operator],
   4150               check_macro, operator))
   4151 
   4152 
   4153 def CheckAltTokens(filename, clean_lines, linenum, error):
   4154   """Check alternative keywords being used in boolean expressions.
   4155 
   4156   Args:
   4157     filename: The name of the current file.
   4158     clean_lines: A CleansedLines instance containing the file.
   4159     linenum: The number of the line to check.
   4160     error: The function to call with any errors found.
   4161   """
   4162   line = clean_lines.elided[linenum]
   4163 
   4164   # Avoid preprocessor lines
   4165   if Match(r'^\s*#', line):
   4166     return
   4167 
   4168   # Last ditch effort to avoid multi-line comments.  This will not help
   4169   # if the comment started before the current line or ended after the
   4170   # current line, but it catches most of the false positives.  At least,
   4171   # it provides a way to workaround this warning for people who use
   4172   # multi-line comments in preprocessor macros.
   4173   #
   4174   # TODO(unknown): remove this once cpplint has better support for
   4175   # multi-line comments.
   4176   if line.find('/*') >= 0 or line.find('*/') >= 0:
   4177     return
   4178 
   4179   for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
   4180     error(filename, linenum, 'readability/alt_tokens', 2,
   4181           'Use operator %s instead of %s' % (
   4182               _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
   4183 
   4184 
   4185 def GetLineWidth(line):
   4186   """Determines the width of the line in column positions.
   4187 
   4188   Args:
   4189     line: A string, which may be a Unicode string.
   4190 
   4191   Returns:
   4192     The width of the line in column positions, accounting for Unicode
   4193     combining characters and wide characters.
   4194   """
   4195   if isinstance(line, unicode):
   4196     width = 0
   4197     for uc in unicodedata.normalize('NFC', line):
   4198       if unicodedata.east_asian_width(uc) in ('W', 'F'):
   4199         width += 2
   4200       elif not unicodedata.combining(uc):
   4201         width += 1
   4202     return width
   4203   else:
   4204     return len(line)
   4205 
   4206 
   4207 def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
   4208                error):
   4209   """Checks rules from the 'C++ style rules' section of cppguide.html.
   4210 
   4211   Most of these rules are hard to test (naming, comment style), but we
   4212   do what we can.  In particular we check for 2-space indents, line lengths,
   4213   tab usage, spaces inside code, etc.
   4214 
   4215   Args:
   4216     filename: The name of the current file.
   4217     clean_lines: A CleansedLines instance containing the file.
   4218     linenum: The number of the line to check.
   4219     file_extension: The extension (without the dot) of the filename.
   4220     nesting_state: A NestingState instance which maintains information about
   4221                    the current stack of nested blocks being parsed.
   4222     error: The function to call with any errors found.
   4223   """
   4224 
   4225   # Don't use "elided" lines here, otherwise we can't check commented lines.
   4226   # Don't want to use "raw" either, because we don't want to check inside C++11
   4227   # raw strings,
   4228   raw_lines = clean_lines.lines_without_raw_strings
   4229   line = raw_lines[linenum]
   4230   prev = raw_lines[linenum - 1] if linenum > 0 else ''
   4231 
   4232   if line.find('\t') != -1:
   4233     error(filename, linenum, 'whitespace/tab', 1,
   4234           'Tab found; better to use spaces')
   4235 
   4236   # One or three blank spaces at the beginning of the line is weird; it's
   4237   # hard to reconcile that with 2-space indents.
   4238   # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
   4239   # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
   4240   # if(RLENGTH > 20) complain = 0;
   4241   # if(match($0, " +(error|private|public|protected):")) complain = 0;
   4242   # if(match(prev, "&& *$")) complain = 0;
   4243   # if(match(prev, "\\|\\| *$")) complain = 0;
   4244   # if(match(prev, "[\",=><] *$")) complain = 0;
   4245   # if(match($0, " <<")) complain = 0;
   4246   # if(match(prev, " +for \\(")) complain = 0;
   4247   # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
   4248   scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$'
   4249   classinfo = nesting_state.InnermostClass()
   4250   initial_spaces = 0
   4251   cleansed_line = clean_lines.elided[linenum]
   4252   while initial_spaces < len(line) and line[initial_spaces] == ' ':
   4253     initial_spaces += 1
   4254   # There are certain situations we allow one space, notably for
   4255   # section labels, and also lines containing multi-line raw strings.
   4256   # We also don't check for lines that look like continuation lines
   4257   # (of lines ending in double quotes, commas, equals, or angle brackets)
   4258   # because the rules for how to indent those are non-trivial.
   4259   if (not Search(r'[",=><] *$', prev) and
   4260       (initial_spaces == 1 or initial_spaces == 3) and
   4261       not Match(scope_or_label_pattern, cleansed_line) and
   4262       not (clean_lines.raw_lines[linenum] != line and
   4263            Match(r'^\s*""', line))):
   4264     error(filename, linenum, 'whitespace/indent', 3,
   4265           'Weird number of spaces at line-start.  '
   4266           'Are you using a 2-space indent?')
   4267 
   4268   if line and line[-1].isspace():
   4269     error(filename, linenum, 'whitespace/end_of_line', 4,
   4270           'Line ends in whitespace.  Consider deleting these extra spaces.')
   4271 
   4272   # Check if the line is a header guard.
   4273   is_header_guard = False
   4274   if file_extension == 'h':
   4275     cppvar = GetHeaderGuardCPPVariable(filename)
   4276     if (line.startswith('#ifndef %s' % cppvar) or
   4277         line.startswith('#define %s' % cppvar) or
   4278         line.startswith('#endif  // %s' % cppvar)):
   4279       is_header_guard = True
   4280   # #include lines and header guards can be long, since there's no clean way to
   4281   # split them.
   4282   #
   4283   # URLs can be long too.  It's possible to split these, but it makes them
   4284   # harder to cut&paste.
   4285   #
   4286   # The "$Id:...$" comment may also get very long without it being the
   4287   # developers fault.
   4288   if (not line.startswith('#include') and not is_header_guard and
   4289       not Match(r'^\s*//.*http(s?)://\S*$', line) and
   4290       not Match(r'^\s*//\s*[^\s]*$', line) and
   4291       not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
   4292     line_width = GetLineWidth(line)
   4293     if line_width > _line_length:
   4294       error(filename, linenum, 'whitespace/line_length', 2,
   4295             'Lines should be <= %i characters long' % _line_length)
   4296 
   4297   if (cleansed_line.count(';') > 1 and
   4298       # for loops are allowed two ;'s (and may run over two lines).
   4299       cleansed_line.find('for') == -1 and
   4300       (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
   4301        GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
   4302       # It's ok to have many commands in a switch case that fits in 1 line
   4303       not ((cleansed_line.find('case ') != -1 or
   4304             cleansed_line.find('default:') != -1) and
   4305            cleansed_line.find('break;') != -1)):
   4306     error(filename, linenum, 'whitespace/newline', 0,
   4307           'More than one command on the same line')
   4308 
   4309   # Some more style checks
   4310   CheckBraces(filename, clean_lines, linenum, error)
   4311   CheckTrailingSemicolon(filename, clean_lines, linenum, error)
   4312   CheckEmptyBlockBody(filename, clean_lines, linenum, error)
   4313   CheckAccess(filename, clean_lines, linenum, nesting_state, error)
   4314   CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
   4315   CheckOperatorSpacing(filename, clean_lines, linenum, error)
   4316   CheckParenthesisSpacing(filename, clean_lines, linenum, error)
   4317   CheckCommaSpacing(filename, clean_lines, linenum, error)
   4318   CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error)
   4319   CheckSpacingForFunctionCall(filename, clean_lines, linenum, error)
   4320   CheckCheck(filename, clean_lines, linenum, error)
   4321   CheckAltTokens(filename, clean_lines, linenum, error)
   4322   classinfo = nesting_state.InnermostClass()
   4323   if classinfo:
   4324     CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
   4325 
   4326 
   4327 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
   4328 # Matches the first component of a filename delimited by -s and _s. That is:
   4329 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
   4330 #  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
   4331 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
   4332 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
   4333 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
   4334 
   4335 
   4336 def _DropCommonSuffixes(filename):
   4337   """Drops common suffixes like _test.cc or -inl.h from filename.
   4338 
   4339   For example:
   4340     >>> _DropCommonSuffixes('foo/foo-inl.h')
   4341     'foo/foo'
   4342     >>> _DropCommonSuffixes('foo/bar/foo.cc')
   4343     'foo/bar/foo'
   4344     >>> _DropCommonSuffixes('foo/foo_internal.h')
   4345     'foo/foo'
   4346     >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
   4347     'foo/foo_unusualinternal'
   4348 
   4349   Args:
   4350     filename: The input filename.
   4351 
   4352   Returns:
   4353     The filename with the common suffix removed.
   4354   """
   4355   for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
   4356                  'inl.h', 'impl.h', 'internal.h'):
   4357     if (filename.endswith(suffix) and len(filename) > len(suffix) and
   4358         filename[-len(suffix) - 1] in ('-', '_')):
   4359       return filename[:-len(suffix) - 1]
   4360   return os.path.splitext(filename)[0]
   4361 
   4362 
   4363 def _ClassifyInclude(fileinfo, include, is_system):
   4364   """Figures out what kind of header 'include' is.
   4365 
   4366   Args:
   4367     fileinfo: The current file cpplint is running over. A FileInfo instance.
   4368     include: The path to a #included file.
   4369     is_system: True if the #include used <> rather than "".
   4370 
   4371   Returns:
   4372     One of the _XXX_HEADER constants.
   4373 
   4374   For example:
   4375     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
   4376     _C_SYS_HEADER
   4377     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
   4378     _CPP_SYS_HEADER
   4379     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
   4380     _LIKELY_MY_HEADER
   4381     >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
   4382     ...                  'bar/foo_other_ext.h', False)
   4383     _POSSIBLE_MY_HEADER
   4384     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
   4385     _OTHER_HEADER
   4386   """
   4387   # This is a list of all standard c++ header files, except
   4388   # those already checked for above.
   4389   is_cpp_h = include in _CPP_HEADERS
   4390 
   4391   if is_system:
   4392     if is_cpp_h:
   4393       return _CPP_SYS_HEADER
   4394     else:
   4395       return _C_SYS_HEADER
   4396 
   4397   # If the target file and the include we're checking share a
   4398   # basename when we drop common extensions, and the include
   4399   # lives in . , then it's likely to be owned by the target file.
   4400   target_dir, target_base = (
   4401       os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
   4402   include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
   4403   if target_base == include_base and (
   4404       include_dir == target_dir or
   4405       include_dir == os.path.normpath(target_dir + '/../public')):
   4406     return _LIKELY_MY_HEADER
   4407 
   4408   # If the target and include share some initial basename
   4409   # component, it's possible the target is implementing the
   4410   # include, so it's allowed to be first, but we'll never
   4411   # complain if it's not there.
   4412   target_first_component = _RE_FIRST_COMPONENT.match(target_base)
   4413   include_first_component = _RE_FIRST_COMPONENT.match(include_base)
   4414   if (target_first_component and include_first_component and
   4415       target_first_component.group(0) ==
   4416       include_first_component.group(0)):
   4417     return _POSSIBLE_MY_HEADER
   4418 
   4419   return _OTHER_HEADER
   4420 
   4421 
   4422 
   4423 def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
   4424   """Check rules that are applicable to #include lines.
   4425 
   4426   Strings on #include lines are NOT removed from elided line, to make
   4427   certain tasks easier. However, to prevent false positives, checks
   4428   applicable to #include lines in CheckLanguage must be put here.
   4429 
   4430   Args:
   4431     filename: The name of the current file.
   4432     clean_lines: A CleansedLines instance containing the file.
   4433     linenum: The number of the line to check.
   4434     include_state: An _IncludeState instance in which the headers are inserted.
   4435     error: The function to call with any errors found.
   4436   """
   4437   fileinfo = FileInfo(filename)
   4438   line = clean_lines.lines[linenum]
   4439 
   4440   # "include" should use the new style "foo/bar.h" instead of just "bar.h"
   4441   # Only do this check if the included header follows google naming
   4442   # conventions.  If not, assume that it's a 3rd party API that
   4443   # requires special include conventions.
   4444   #
   4445   # We also make an exception for Lua headers, which follow google
   4446   # naming convention but not the include convention.
   4447   match = Match(r'#include\s*"([^/]+\.h)"', line)
   4448   if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)):
   4449     error(filename, linenum, 'build/include', 4,
   4450           'Include the directory when naming .h files')
   4451 
   4452   # we shouldn't include a file more than once. actually, there are a
   4453   # handful of instances where doing so is okay, but in general it's
   4454   # not.
   4455   match = _RE_PATTERN_INCLUDE.search(line)
   4456   if match:
   4457     include = match.group(2)
   4458     is_system = (match.group(1) == '<')
   4459     duplicate_line = include_state.FindHeader(include)
   4460     if duplicate_line >= 0:
   4461       error(filename, linenum, 'build/include', 4,
   4462             '"%s" already included at %s:%s' %
   4463             (include, filename, duplicate_line))
   4464     elif (include.endswith('.cc') and
   4465           os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)):
   4466       error(filename, linenum, 'build/include', 4,
   4467             'Do not include .cc files from other packages')
   4468     elif not _THIRD_PARTY_HEADERS_PATTERN.match(include):
   4469       include_state.include_list[-1].append((include, linenum))
   4470 
   4471       # We want to ensure that headers appear in the right order:
   4472       # 1) for foo.cc, foo.h  (preferred location)
   4473       # 2) c system files
   4474       # 3) cpp system files
   4475       # 4) for foo.cc, foo.h  (deprecated location)
   4476       # 5) other google headers
   4477       #
   4478       # We classify each include statement as one of those 5 types
   4479       # using a number of techniques. The include_state object keeps
   4480       # track of the highest type seen, and complains if we see a
   4481       # lower type after that.
   4482       error_message = include_state.CheckNextIncludeOrder(
   4483           _ClassifyInclude(fileinfo, include, is_system))
   4484       if error_message:
   4485         error(filename, linenum, 'build/include_order', 4,
   4486               '%s. Should be: %s.h, c system, c++ system, other.' %
   4487               (error_message, fileinfo.BaseName()))
   4488       canonical_include = include_state.CanonicalizeAlphabeticalOrder(include)
   4489       if not include_state.IsInAlphabeticalOrder(
   4490           clean_lines, linenum, canonical_include):
   4491         error(filename, linenum, 'build/include_alpha', 4,
   4492               'Include "%s" not in alphabetical order' % include)
   4493       include_state.SetLastHeader(canonical_include)
   4494 
   4495 
   4496 
   4497 def _GetTextInside(text, start_pattern):
   4498   r"""Retrieves all the text between matching open and close parentheses.
   4499 
   4500   Given a string of lines and a regular expression string, retrieve all the text
   4501   following the expression and between opening punctuation symbols like
   4502   (, [, or {, and the matching close-punctuation symbol. This properly nested
   4503   occurrences of the punctuations, so for the text like
   4504     printf(a(), b(c()));
   4505   a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
   4506   start_pattern must match string having an open punctuation symbol at the end.
   4507 
   4508   Args:
   4509     text: The lines to extract text. Its comments and strings must be elided.
   4510            It can be single line and can span multiple lines.
   4511     start_pattern: The regexp string indicating where to start extracting
   4512                    the text.
   4513   Returns:
   4514     The extracted text.
   4515     None if either the opening string or ending punctuation could not be found.
   4516   """
   4517   # TODO(unknown): Audit cpplint.py to see what places could be profitably
   4518   # rewritten to use _GetTextInside (and use inferior regexp matching today).
   4519 
   4520   # Give opening punctuations to get the matching close-punctuations.
   4521   matching_punctuation = {'(': ')', '{': '}', '[': ']'}
   4522   closing_punctuation = set(matching_punctuation.itervalues())
   4523 
   4524   # Find the position to start extracting text.
   4525   match = re.search(start_pattern, text, re.M)
   4526   if not match:  # start_pattern not found in text.
   4527     return None
   4528   start_position = match.end(0)
   4529 
   4530   assert start_position > 0, (
   4531       'start_pattern must ends with an opening punctuation.')
   4532   assert text[start_position - 1] in matching_punctuation, (
   4533       'start_pattern must ends with an opening punctuation.')
   4534   # Stack of closing punctuations we expect to have in text after position.
   4535   punctuation_stack = [matching_punctuation[text[start_position - 1]]]
   4536   position = start_position
   4537   while punctuation_stack and position < len(text):
   4538     if text[position] == punctuation_stack[-1]:
   4539       punctuation_stack.pop()
   4540     elif text[position] in closing_punctuation:
   4541       # A closing punctuation without matching opening punctuations.
   4542       return None
   4543     elif text[position] in matching_punctuation:
   4544       punctuation_stack.append(matching_punctuation[text[position]])
   4545     position += 1
   4546   if punctuation_stack:
   4547     # Opening punctuations left without matching close-punctuations.
   4548     return None
   4549   # punctuations match.
   4550   return text[start_position:position - 1]
   4551 
   4552 
   4553 # Patterns for matching call-by-reference parameters.
   4554 #
   4555 # Supports nested templates up to 2 levels deep using this messy pattern:
   4556 #   < (?: < (?: < [^<>]*
   4557 #               >
   4558 #           |   [^<>] )*
   4559 #         >
   4560 #     |   [^<>] )*
   4561 #   >
   4562 _RE_PATTERN_IDENT = r'[_a-zA-Z]\w*'  # =~ [[:alpha:]][[:alnum:]]*
   4563 _RE_PATTERN_TYPE = (
   4564     r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?'
   4565     r'(?:\w|'
   4566     r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|'
   4567     r'::)+')
   4568 # A call-by-reference parameter ends with '& identifier'.
   4569 _RE_PATTERN_REF_PARAM = re.compile(
   4570     r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*'
   4571     r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]')
   4572 # A call-by-const-reference parameter either ends with 'const& identifier'
   4573 # or looks like 'const type& identifier' when 'type' is atomic.
   4574 _RE_PATTERN_CONST_REF_PARAM = (
   4575     r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT +
   4576     r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')')
   4577 # Stream types.
   4578 _RE_PATTERN_REF_STREAM_PARAM = (
   4579     r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')')
   4580 
   4581 
   4582 def CheckLanguage(filename, clean_lines, linenum, file_extension,
   4583                   include_state, nesting_state, error):
   4584   """Checks rules from the 'C++ language rules' section of cppguide.html.
   4585 
   4586   Some of these rules are hard to test (function overloading, using
   4587   uint32 inappropriately), but we do the best we can.
   4588 
   4589   Args:
   4590     filename: The name of the current file.
   4591     clean_lines: A CleansedLines instance containing the file.
   4592     linenum: The number of the line to check.
   4593     file_extension: The extension (without the dot) of the filename.
   4594     include_state: An _IncludeState instance in which the headers are inserted.
   4595     nesting_state: A NestingState instance which maintains information about
   4596                    the current stack of nested blocks being parsed.
   4597     error: The function to call with any errors found.
   4598   """
   4599   # If the line is empty or consists of entirely a comment, no need to
   4600   # check it.
   4601   line = clean_lines.elided[linenum]
   4602   if not line:
   4603     return
   4604 
   4605   match = _RE_PATTERN_INCLUDE.search(line)
   4606   if match:
   4607     CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
   4608     return
   4609 
   4610   # Reset include state across preprocessor directives.  This is meant
   4611   # to silence warnings for conditional includes.
   4612   match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line)
   4613   if match:
   4614     include_state.ResetSection(match.group(1))
   4615 
   4616   # Make Windows paths like Unix.
   4617   fullname = os.path.abspath(filename).replace('\\', '/')
   4618 
   4619   # Perform other checks now that we are sure that this is not an include line
   4620   CheckCasts(filename, clean_lines, linenum, error)
   4621   CheckGlobalStatic(filename, clean_lines, linenum, error)
   4622   CheckPrintf(filename, clean_lines, linenum, error)
   4623 
   4624   if file_extension == 'h':
   4625     # TODO(unknown): check that 1-arg constructors are explicit.
   4626     #                How to tell it's a constructor?
   4627     #                (handled in CheckForNonStandardConstructs for now)
   4628     # TODO(unknown): check that classes declare or disable copy/assign
   4629     #                (level 1 error)
   4630     pass
   4631 
   4632   # Check if people are using the verboten C basic types.  The only exception
   4633   # we regularly allow is "unsigned short port" for port.
   4634   if Search(r'\bshort port\b', line):
   4635     if not Search(r'\bunsigned short port\b', line):
   4636       error(filename, linenum, 'runtime/int', 4,
   4637             'Use "unsigned short" for ports, not "short"')
   4638   else:
   4639     match = Search(r'\b(short|long(?! +double)|long long)\b', line)
   4640     if match:
   4641       error(filename, linenum, 'runtime/int', 4,
   4642             'Use int16/int64/etc, rather than the C type %s' % match.group(1))
   4643 
   4644   # Check if some verboten operator overloading is going on
   4645   # TODO(unknown): catch out-of-line unary operator&:
   4646   #   class X {};
   4647   #   int operator&(const X& x) { return 42; }  // unary operator&
   4648   # The trick is it's hard to tell apart from binary operator&:
   4649   #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
   4650   if Search(r'\boperator\s*&\s*\(\s*\)', line):
   4651     error(filename, linenum, 'runtime/operator', 4,
   4652           'Unary operator& is dangerous.  Do not use it.')
   4653 
   4654   # Check for suspicious usage of "if" like
   4655   # } if (a == b) {
   4656   if Search(r'\}\s*if\s*\(', line):
   4657     error(filename, linenum, 'readability/braces', 4,
   4658           'Did you mean "else if"? If not, start a new line for "if".')
   4659 
   4660   # Check for potential format string bugs like printf(foo).
   4661   # We constrain the pattern not to pick things like DocidForPrintf(foo).
   4662   # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
   4663   # TODO(unknown): Catch the following case. Need to change the calling
   4664   # convention of the whole function to process multiple line to handle it.
   4665   #   printf(
   4666   #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
   4667   printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
   4668   if printf_args:
   4669     match = Match(r'([\w.\->()]+)$', printf_args)
   4670     if match and match.group(1) != '__VA_ARGS__':
   4671       function_name = re.search(r'\b((?:string)?printf)\s*\(',
   4672                                 line, re.I).group(1)
   4673       error(filename, linenum, 'runtime/printf', 4,
   4674             'Potential format string bug. Do %s("%%s", %s) instead.'
   4675             % (function_name, match.group(1)))
   4676 
   4677   # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
   4678   match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
   4679   if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
   4680     error(filename, linenum, 'runtime/memset', 4,
   4681           'Did you mean "memset(%s, 0, %s)"?'
   4682           % (match.group(1), match.group(2)))
   4683 
   4684   if Search(r'\busing namespace\b', line):
   4685     error(filename, linenum, 'build/namespaces', 5,
   4686           'Do not use namespace using-directives.  '
   4687           'Use using-declarations instead.')
   4688 
   4689   # Detect variable-length arrays.
   4690   match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
   4691   if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
   4692       match.group(3).find(']') == -1):
   4693     # Split the size using space and arithmetic operators as delimiters.
   4694     # If any of the resulting tokens are not compile time constants then
   4695     # report the error.
   4696     tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
   4697     is_const = True
   4698     skip_next = False
   4699     for tok in tokens:
   4700       if skip_next:
   4701         skip_next = False
   4702         continue
   4703 
   4704       if Search(r'sizeof\(.+\)', tok): continue
   4705       if Search(r'arraysize\(\w+\)', tok): continue
   4706 
   4707       tok = tok.lstrip('(')
   4708       tok = tok.rstrip(')')
   4709       if not tok: continue
   4710       if Match(r'\d+', tok): continue
   4711       if Match(r'0[xX][0-9a-fA-F]+', tok): continue
   4712       if Match(r'k[A-Z0-9]\w*', tok): continue
   4713       if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
   4714       if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
   4715       # A catch all for tricky sizeof cases, including 'sizeof expression',
   4716       # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
   4717       # requires skipping the next token because we split on ' ' and '*'.
   4718       if tok.startswith('sizeof'):
   4719         skip_next = True
   4720         continue
   4721       is_const = False
   4722       break
   4723     if not is_const:
   4724       error(filename, linenum, 'runtime/arrays', 1,
   4725             'Do not use variable-length arrays.  Use an appropriately named '
   4726             "('k' followed by CamelCase) compile-time constant for the size.")
   4727 
   4728   # Check for use of unnamed namespaces in header files.  Registration
   4729   # macros are typically OK, so we allow use of "namespace {" on lines
   4730   # that end with backslashes.
   4731   if (file_extension == 'h'
   4732       and Search(r'\bnamespace\s*{', line)
   4733       and line[-1] != '\\'):
   4734     error(filename, linenum, 'build/namespaces', 4,
   4735           'Do not use unnamed namespaces in header files.  See '
   4736           'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
   4737           ' for more information.')
   4738 
   4739 
   4740 def CheckGlobalStatic(filename, clean_lines, linenum, error):
   4741   """Check for unsafe global or static objects.
   4742 
   4743   Args:
   4744     filename: The name of the current file.
   4745     clean_lines: A CleansedLines instance containing the file.
   4746     linenum: The number of the line to check.
   4747     error: The function to call with any errors found.
   4748   """
   4749   line = clean_lines.elided[linenum]
   4750 
   4751   # Match two lines at a time to support multiline declarations
   4752   if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line):
   4753     line += clean_lines.elided[linenum + 1].strip()
   4754 
   4755   # Check for people declaring static/global STL strings at the top level.
   4756   # This is dangerous because the C++ language does not guarantee that
   4757   # globals with constructors are initialized before the first access, and
   4758   # also because globals can be destroyed when some threads are still running.
   4759   # TODO(unknown): Generalize this to also find static unique_ptr instances.
   4760   # TODO(unknown): File bugs for clang-tidy to find these.
   4761   match = Match(
   4762       r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +'
   4763       r'([a-zA-Z0-9_:]+)\b(.*)',
   4764       line)
   4765 
   4766   # Remove false positives:
   4767   # - String pointers (as opposed to values).
   4768   #    string *pointer
   4769   #    const string *pointer
   4770   #    string const *pointer
   4771   #    string *const pointer
   4772   #
   4773   # - Functions and template specializations.
   4774   #    string Function<Type>(...
   4775   #    string Class<Type>::Method(...
   4776   #
   4777   # - Operators.  These are matched separately because operator names
   4778   #   cross non-word boundaries, and trying to match both operators
   4779   #   and functions at the same time would decrease accuracy of
   4780   #   matching identifiers.
   4781   #    string Class::operator*()
   4782   if (match and
   4783       not Search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and
   4784       not Search(r'\boperator\W', line) and
   4785       not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))):
   4786     if Search(r'\bconst\b', line):
   4787       error(filename, linenum, 'runtime/string', 4,
   4788             'For a static/global string constant, use a C style string '
   4789             'instead: "%schar%s %s[]".' %
   4790             (match.group(1), match.group(2) or '', match.group(3)))
   4791     else:
   4792       error(filename, linenum, 'runtime/string', 4,
   4793             'Static/global string variables are not permitted.')
   4794 
   4795   if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or
   4796       Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)):
   4797     error(filename, linenum, 'runtime/init', 4,
   4798           'You seem to be initializing a member variable with itself.')
   4799 
   4800 
   4801 def CheckPrintf(filename, clean_lines, linenum, error):
   4802   """Check for printf related issues.
   4803 
   4804   Args:
   4805     filename: The name of the current file.
   4806     clean_lines: A CleansedLines instance containing the file.
   4807     linenum: The number of the line to check.
   4808     error: The function to call with any errors found.
   4809   """
   4810   line = clean_lines.elided[linenum]
   4811 
   4812   # When snprintf is used, the second argument shouldn't be a literal.
   4813   match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
   4814   if match and match.group(2) != '0':
   4815     # If 2nd arg is zero, snprintf is used to calculate size.
   4816     error(filename, linenum, 'runtime/printf', 3,
   4817           'If you can, use sizeof(%s) instead of %s as the 2nd arg '
   4818           'to snprintf.' % (match.group(1), match.group(2)))
   4819 
   4820   # Check if some verboten C functions are being used.
   4821   if Search(r'\bsprintf\s*\(', line):
   4822     error(filename, linenum, 'runtime/printf', 5,
   4823           'Never use sprintf. Use snprintf instead.')
   4824   match = Search(r'\b(strcpy|strcat)\s*\(', line)
   4825   if match:
   4826     error(filename, linenum, 'runtime/printf', 4,
   4827           'Almost always, snprintf is better than %s' % match.group(1))
   4828 
   4829 
   4830 def IsDerivedFunction(clean_lines, linenum):
   4831   """Check if current line contains an inherited function.
   4832 
   4833   Args:
   4834     clean_lines: A CleansedLines instance containing the file.
   4835     linenum: The number of the line to check.
   4836   Returns:
   4837     True if current line contains a function with "override"
   4838     virt-specifier.
   4839   """
   4840   # Scan back a few lines for start of current function
   4841   for i in xrange(linenum, max(-1, linenum - 10), -1):
   4842     match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i])
   4843     if match:
   4844       # Look for "override" after the matching closing parenthesis
   4845       line, _, closing_paren = CloseExpression(
   4846           clean_lines, i, len(match.group(1)))
   4847       return (closing_paren >= 0 and
   4848               Search(r'\boverride\b', line[closing_paren:]))
   4849   return False
   4850 
   4851 
   4852 def IsOutOfLineMethodDefinition(clean_lines, linenum):
   4853   """Check if current line contains an out-of-line method definition.
   4854 
   4855   Args:
   4856     clean_lines: A CleansedLines instance containing the file.
   4857     linenum: The number of the line to check.
   4858   Returns:
   4859     True if current line contains an out-of-line method definition.
   4860   """
   4861   # Scan back a few lines for start of current function
   4862   for i in xrange(linenum, max(-1, linenum - 10), -1):
   4863     if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]):
   4864       return Match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None
   4865   return False
   4866 
   4867 
   4868 def IsInitializerList(clean_lines, linenum):
   4869   """Check if current line is inside constructor initializer list.
   4870 
   4871   Args:
   4872     clean_lines: A CleansedLines instance containing the file.
   4873     linenum: The number of the line to check.
   4874   Returns:
   4875     True if current line appears to be inside constructor initializer
   4876     list, False otherwise.
   4877   """
   4878   for i in xrange(linenum, 1, -1):
   4879     line = clean_lines.elided[i]
   4880     if i == linenum:
   4881       remove_function_body = Match(r'^(.*)\{\s*$', line)
   4882       if remove_function_body:
   4883         line = remove_function_body.group(1)
   4884 
   4885     if Search(r'\s:\s*\w+[({]', line):
   4886       # A lone colon tend to indicate the start of a constructor
   4887       # initializer list.  It could also be a ternary operator, which
   4888       # also tend to appear in constructor initializer lists as
   4889       # opposed to parameter lists.
   4890       return True
   4891     if Search(r'\}\s*,\s*$', line):
   4892       # A closing brace followed by a comma is probably the end of a
   4893       # brace-initialized member in constructor initializer list.
   4894       return True
   4895     if Search(r'[{};]\s*$', line):
   4896       # Found one of the following:
   4897       # - A closing brace or semicolon, probably the end of the previous
   4898       #   function.
   4899       # - An opening brace, probably the start of current class or namespace.
   4900       #
   4901       # Current line is probably not inside an initializer list since
   4902       # we saw one of those things without seeing the starting colon.
   4903       return False
   4904 
   4905   # Got to the beginning of the file without seeing the start of
   4906   # constructor initializer list.
   4907   return False
   4908 
   4909 
   4910 def CheckForNonConstReference(filename, clean_lines, linenum,
   4911                               nesting_state, error):
   4912   """Check for non-const references.
   4913 
   4914   Separate from CheckLanguage since it scans backwards from current
   4915   line, instead of scanning forward.
   4916 
   4917   Args:
   4918     filename: The name of the current file.
   4919     clean_lines: A CleansedLines instance containing the file.
   4920     linenum: The number of the line to check.
   4921     nesting_state: A NestingState instance which maintains information about
   4922                    the current stack of nested blocks being parsed.
   4923     error: The function to call with any errors found.
   4924   """
   4925   # Do nothing if there is no '&' on current line.
   4926   line = clean_lines.elided[linenum]
   4927   if '&' not in line:
   4928     return
   4929 
   4930   # If a function is inherited, current function doesn't have much of
   4931   # a choice, so any non-const references should not be blamed on
   4932   # derived function.
   4933   if IsDerivedFunction(clean_lines, linenum):
   4934     return
   4935 
   4936   # Don't warn on out-of-line method definitions, as we would warn on the
   4937   # in-line declaration, if it isn't marked with 'override'.
   4938   if IsOutOfLineMethodDefinition(clean_lines, linenum):
   4939     return
   4940 
   4941   # Long type names may be broken across multiple lines, usually in one
   4942   # of these forms:
   4943   #   LongType
   4944   #       ::LongTypeContinued &identifier
   4945   #   LongType::
   4946   #       LongTypeContinued &identifier
   4947   #   LongType<
   4948   #       ...>::LongTypeContinued &identifier
   4949   #
   4950   # If we detected a type split across two lines, join the previous
   4951   # line to current line so that we can match const references
   4952   # accordingly.
   4953   #
   4954   # Note that this only scans back one line, since scanning back
   4955   # arbitrary number of lines would be expensive.  If you have a type
   4956   # that spans more than 2 lines, please use a typedef.
   4957   if linenum > 1:
   4958     previous = None
   4959     if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line):
   4960       # previous_line\n + ::current_line
   4961       previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$',
   4962                         clean_lines.elided[linenum - 1])
   4963     elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line):
   4964       # previous_line::\n + current_line
   4965       previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$',
   4966                         clean_lines.elided[linenum - 1])
   4967     if previous:
   4968       line = previous.group(1) + line.lstrip()
   4969     else:
   4970       # Check for templated parameter that is split across multiple lines
   4971       endpos = line.rfind('>')
   4972       if endpos > -1:
   4973         (_, startline, startpos) = ReverseCloseExpression(
   4974             clean_lines, linenum, endpos)
   4975         if startpos > -1 and startline < linenum:
   4976           # Found the matching < on an earlier line, collect all
   4977           # pieces up to current line.
   4978           line = ''
   4979           for i in xrange(startline, linenum + 1):
   4980             line += clean_lines.elided[i].strip()
   4981 
   4982   # Check for non-const references in function parameters.  A single '&' may
   4983   # found in the following places:
   4984   #   inside expression: binary & for bitwise AND
   4985   #   inside expression: unary & for taking the address of something
   4986   #   inside declarators: reference parameter
   4987   # We will exclude the first two cases by checking that we are not inside a
   4988   # function body, including one that was just introduced by a trailing '{'.
   4989   # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare].
   4990   if (nesting_state.previous_stack_top and
   4991       not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or
   4992            isinstance(nesting_state.previous_stack_top, _NamespaceInfo))):
   4993     # Not at toplevel, not within a class, and not within a namespace
   4994     return
   4995 
   4996   # Avoid initializer lists.  We only need to scan back from the
   4997   # current line for something that starts with ':'.
   4998   #
   4999   # We don't need to check the current line, since the '&' would
   5000   # appear inside the second set of parentheses on the current line as
   5001   # opposed to the first set.
   5002   if linenum > 0:
   5003     for i in xrange(linenum - 1, max(0, linenum - 10), -1):
   5004       previous_line = clean_lines.elided[i]
   5005       if not Search(r'[),]\s*$', previous_line):
   5006         break
   5007       if Match(r'^\s*:\s+\S', previous_line):
   5008         return
   5009 
   5010   # Avoid preprocessors
   5011   if Search(r'\\\s*$', line):
   5012     return
   5013 
   5014   # Avoid constructor initializer lists
   5015   if IsInitializerList(clean_lines, linenum):
   5016     return
   5017 
   5018   # We allow non-const references in a few standard places, like functions
   5019   # called "swap()" or iostream operators like "<<" or ">>".  Do not check
   5020   # those function parameters.
   5021   #
   5022   # We also accept & in static_assert, which looks like a function but
   5023   # it's actually a declaration expression.
   5024   whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|'
   5025                            r'operator\s*[<>][<>]|'
   5026                            r'static_assert|COMPILE_ASSERT'
   5027                            r')\s*\(')
   5028   if Search(whitelisted_functions, line):
   5029     return
   5030   elif not Search(r'\S+\([^)]*$', line):
   5031     # Don't see a whitelisted function on this line.  Actually we
   5032     # didn't see any function name on this line, so this is likely a
   5033     # multi-line parameter list.  Try a bit harder to catch this case.
   5034     for i in xrange(2):
   5035       if (linenum > i and
   5036           Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])):
   5037         return
   5038 
   5039   decls = ReplaceAll(r'{[^}]*}', ' ', line)  # exclude function body
   5040   for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls):
   5041     if (not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and
   5042         not Match(_RE_PATTERN_REF_STREAM_PARAM, parameter)):
   5043       error(filename, linenum, 'runtime/references', 2,
   5044             'Is this a non-const reference? '
   5045             'If so, make const or use a pointer: ' +
   5046             ReplaceAll(' *<', '<', parameter))
   5047 
   5048 
   5049 def CheckCasts(filename, clean_lines, linenum, error):
   5050   """Various cast related checks.
   5051 
   5052   Args:
   5053     filename: The name of the current file.
   5054     clean_lines: A CleansedLines instance containing the file.
   5055     linenum: The number of the line to check.
   5056     error: The function to call with any errors found.
   5057   """
   5058   line = clean_lines.elided[linenum]
   5059 
   5060   # Check to see if they're using an conversion function cast.
   5061   # I just try to capture the most common basic types, though there are more.
   5062   # Parameterless conversion functions, such as bool(), are allowed as they are
   5063   # probably a member operator declaration or default constructor.
   5064   match = Search(
   5065       r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b'
   5066       r'(int|float|double|bool|char|int32|uint32|int64|uint64)'
   5067       r'(\([^)].*)', line)
   5068   expecting_function = ExpectingFunctionArgs(clean_lines, linenum)
   5069   if match and not expecting_function:
   5070     matched_type = match.group(2)
   5071 
   5072     # matched_new_or_template is used to silence two false positives:
   5073     # - New operators
   5074     # - Template arguments with function types
   5075     #
   5076     # For template arguments, we match on types immediately following
   5077     # an opening bracket without any spaces.  This is a fast way to
   5078     # silence the common case where the function type is the first
   5079     # template argument.  False negative with less-than comparison is
   5080     # avoided because those operators are usually followed by a space.
   5081     #
   5082     #   function<double(double)>   // bracket + no space = false positive
   5083     #   value < double(42)         // bracket + space = true positive
   5084     matched_new_or_template = match.group(1)
   5085 
   5086     # Avoid arrays by looking for brackets that come after the closing
   5087     # parenthesis.
   5088     if Match(r'\([^()]+\)\s*\[', match.group(3)):
   5089       return
   5090 
   5091     # Other things to ignore:
   5092     # - Function pointers
   5093     # - Casts to pointer types
   5094     # - Placement new
   5095     # - Alias declarations
   5096     matched_funcptr = match.group(3)
   5097     if (matched_new_or_template is None and
   5098         not (matched_funcptr and
   5099              (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(',
   5100                     matched_funcptr) or
   5101               matched_funcptr.startswith('(*)'))) and
   5102         not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and
   5103         not Search(r'new\(\S+\)\s*' + matched_type, line)):
   5104       error(filename, linenum, 'readability/casting', 4,
   5105             'Using deprecated casting style.  '
   5106             'Use static_cast<%s>(...) instead' %
   5107             matched_type)
   5108 
   5109   if not expecting_function:
   5110     CheckCStyleCast(filename, clean_lines, linenum, 'static_cast',
   5111                     r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
   5112 
   5113   # This doesn't catch all cases. Consider (const char * const)"hello".
   5114   #
   5115   # (char *) "foo" should always be a const_cast (reinterpret_cast won't
   5116   # compile).
   5117   if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast',
   5118                      r'\((char\s?\*+\s?)\)\s*"', error):
   5119     pass
   5120   else:
   5121     # Check pointer casts for other than string constants
   5122     CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast',
   5123                     r'\((\w+\s?\*+\s?)\)', error)
   5124 
   5125   # In addition, we look for people taking the address of a cast.  This
   5126   # is dangerous -- casts can assign to temporaries, so the pointer doesn't
   5127   # point where you think.
   5128   #
   5129   # Some non-identifier character is required before the '&' for the
   5130   # expression to be recognized as a cast.  These are casts:
   5131   #   expression = &static_cast<int*>(temporary());
   5132   #   function(&(int*)(temporary()));
   5133   #
   5134   # This is not a cast:
   5135   #   reference_type&(int* function_param);
   5136   match = Search(
   5137       r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|'
   5138       r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line)
   5139   if match:
   5140     # Try a better error message when the & is bound to something
   5141     # dereferenced by the casted pointer, as opposed to the casted
   5142     # pointer itself.
   5143     parenthesis_error = False
   5144     match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line)
   5145     if match:
   5146       _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1)))
   5147       if x1 >= 0 and clean_lines.elided[y1][x1] == '(':
   5148         _, y2, x2 = CloseExpression(clean_lines, y1, x1)
   5149         if x2 >= 0:
   5150           extended_line = clean_lines.elided[y2][x2:]
   5151           if y2 < clean_lines.NumLines() - 1:
   5152             extended_line += clean_lines.elided[y2 + 1]
   5153           if Match(r'\s*(?:->|\[)', extended_line):
   5154             parenthesis_error = True
   5155 
   5156     if parenthesis_error:
   5157       error(filename, linenum, 'readability/casting', 4,
   5158             ('Are you taking an address of something dereferenced '
   5159              'from a cast?  Wrapping the dereferenced expression in '
   5160              'parentheses will make the binding more obvious'))
   5161     else:
   5162       error(filename, linenum, 'runtime/casting', 4,
   5163             ('Are you taking an address of a cast?  '
   5164              'This is dangerous: could be a temp var.  '
   5165              'Take the address before doing the cast, rather than after'))
   5166 
   5167 
   5168 def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error):
   5169   """Checks for a C-style cast by looking for the pattern.
   5170 
   5171   Args:
   5172     filename: The name of the current file.
   5173     clean_lines: A CleansedLines instance containing the file.
   5174     linenum: The number of the line to check.
   5175     cast_type: The string for the C++ cast to recommend.  This is either
   5176       reinterpret_cast, static_cast, or const_cast, depending.
   5177     pattern: The regular expression used to find C-style casts.
   5178     error: The function to call with any errors found.
   5179 
   5180   Returns:
   5181     True if an error was emitted.
   5182     False otherwise.
   5183   """
   5184   line = clean_lines.elided[linenum]
   5185   match = Search(pattern, line)
   5186   if not match:
   5187     return False
   5188 
   5189   # Exclude lines with keywords that tend to look like casts
   5190   context = line[0:match.start(1) - 1]
   5191   if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context):
   5192     return False
   5193 
   5194   # Try expanding current context to see if we one level of
   5195   # parentheses inside a macro.
   5196   if linenum > 0:
   5197     for i in xrange(linenum - 1, max(0, linenum - 5), -1):
   5198       context = clean_lines.elided[i] + context
   5199   if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context):
   5200     return False
   5201 
   5202   # operator++(int) and operator--(int)
   5203   if context.endswith(' operator++') or context.endswith(' operator--'):
   5204     return False
   5205 
   5206   # A single unnamed argument for a function tends to look like old style cast.
   5207   # If we see those, don't issue warnings for deprecated casts.
   5208   remainder = line[match.end(0):]
   5209   if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)',
   5210            remainder):
   5211     return False
   5212 
   5213   # At this point, all that should be left is actual casts.
   5214   error(filename, linenum, 'readability/casting', 4,
   5215         'Using C-style cast.  Use %s<%s>(...) instead' %
   5216         (cast_type, match.group(1)))
   5217 
   5218   return True
   5219 
   5220 
   5221 def ExpectingFunctionArgs(clean_lines, linenum):
   5222   """Checks whether where function type arguments are expected.
   5223 
   5224   Args:
   5225     clean_lines: A CleansedLines instance containing the file.
   5226     linenum: The number of the line to check.
   5227 
   5228   Returns:
   5229     True if the line at 'linenum' is inside something that expects arguments
   5230     of function types.
   5231   """
   5232   line = clean_lines.elided[linenum]
   5233   return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
   5234           (linenum >= 2 and
   5235            (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$',
   5236                   clean_lines.elided[linenum - 1]) or
   5237             Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$',
   5238                   clean_lines.elided[linenum - 2]) or
   5239             Search(r'\bstd::m?function\s*\<\s*$',
   5240                    clean_lines.elided[linenum - 1]))))
   5241 
   5242 
   5243 _HEADERS_CONTAINING_TEMPLATES = (
   5244     ('<deque>', ('deque',)),
   5245     ('<functional>', ('unary_function', 'binary_function',
   5246                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
   5247                       'negate',
   5248                       'equal_to', 'not_equal_to', 'greater', 'less',
   5249                       'greater_equal', 'less_equal',
   5250                       'logical_and', 'logical_or', 'logical_not',
   5251                       'unary_negate', 'not1', 'binary_negate', 'not2',
   5252                       'bind1st', 'bind2nd',
   5253                       'pointer_to_unary_function',
   5254                       'pointer_to_binary_function',
   5255                       'ptr_fun',
   5256                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
   5257                       'mem_fun_ref_t',
   5258                       'const_mem_fun_t', 'const_mem_fun1_t',
   5259                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
   5260                       'mem_fun_ref',
   5261                      )),
   5262     ('<limits>', ('numeric_limits',)),
   5263     ('<list>', ('list',)),
   5264     ('<map>', ('map', 'multimap',)),
   5265     ('<memory>', ('allocator', 'make_shared', 'make_unique', 'shared_ptr',
   5266                   'unique_ptr', 'weak_ptr')),
   5267     ('<queue>', ('queue', 'priority_queue',)),
   5268     ('<set>', ('set', 'multiset',)),
   5269     ('<stack>', ('stack',)),
   5270     ('<string>', ('char_traits', 'basic_string',)),
   5271     ('<tuple>', ('tuple',)),
   5272     ('<unordered_map>', ('unordered_map', 'unordered_multimap')),
   5273     ('<unordered_set>', ('unordered_set', 'unordered_multiset')),
   5274     ('<utility>', ('pair',)),
   5275     ('<vector>', ('vector',)),
   5276 
   5277     # gcc extensions.
   5278     # Note: std::hash is their hash, ::hash is our hash
   5279     ('<hash_map>', ('hash_map', 'hash_multimap',)),
   5280     ('<hash_set>', ('hash_set', 'hash_multiset',)),
   5281     ('<slist>', ('slist',)),
   5282     )
   5283 
   5284 _HEADERS_MAYBE_TEMPLATES = (
   5285     ('<algorithm>', ('copy', 'max', 'min', 'min_element', 'sort',
   5286                      'transform',
   5287                     )),
   5288     ('<utility>', ('forward', 'make_pair', 'move', 'swap')),
   5289     )
   5290 
   5291 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
   5292 
   5293 _re_pattern_headers_maybe_templates = []
   5294 for _header, _templates in _HEADERS_MAYBE_TEMPLATES:
   5295   for _template in _templates:
   5296     # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   5297     # type::max().
   5298     _re_pattern_headers_maybe_templates.append(
   5299         (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
   5300             _template,
   5301             _header))
   5302 
   5303 # Other scripts may reach in and modify this pattern.
   5304 _re_pattern_templates = []
   5305 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
   5306   for _template in _templates:
   5307     _re_pattern_templates.append(
   5308         (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
   5309          _template + '<>',
   5310          _header))
   5311 
   5312 
   5313 def FilesBelongToSameModule(filename_cc, filename_h):
   5314   """Check if these two filenames belong to the same module.
   5315 
   5316   The concept of a 'module' here is a as follows:
   5317   foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
   5318   same 'module' if they are in the same directory.
   5319   some/path/public/xyzzy and some/path/internal/xyzzy are also considered
   5320   to belong to the same module here.
   5321 
   5322   If the filename_cc contains a longer path than the filename_h, for example,
   5323   '/absolute/path/to/base/sysinfo.cc', and this file would include
   5324   'base/sysinfo.h', this function also produces the prefix needed to open the
   5325   header. This is used by the caller of this function to more robustly open the
   5326   header file. We don't have access to the real include paths in this context,
   5327   so we need this guesswork here.
   5328 
   5329   Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
   5330   according to this implementation. Because of this, this function gives
   5331   some false positives. This should be sufficiently rare in practice.
   5332 
   5333   Args:
   5334     filename_cc: is the path for the .cc file
   5335     filename_h: is the path for the header path
   5336 
   5337   Returns:
   5338     Tuple with a bool and a string:
   5339     bool: True if filename_cc and filename_h belong to the same module.
   5340     string: the additional prefix needed to open the header file.
   5341   """
   5342 
   5343   fileinfo = FileInfo(filename_cc)
   5344   if not fileinfo.IsSource():
   5345     return (False, '')
   5346   filename_cc = filename_cc[:-len(fileinfo.Extension())]
   5347   matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo.BaseName())
   5348   if matched_test_suffix:
   5349     filename_cc = filename_cc[:-len(matched_test_suffix.group(1))]
   5350   filename_cc = filename_cc.replace('/public/', '/')
   5351   filename_cc = filename_cc.replace('/internal/', '/')
   5352 
   5353   if not filename_h.endswith('.h'):
   5354     return (False, '')
   5355   filename_h = filename_h[:-len('.h')]
   5356   if filename_h.endswith('-inl'):
   5357     filename_h = filename_h[:-len('-inl')]
   5358   filename_h = filename_h.replace('/public/', '/')
   5359   filename_h = filename_h.replace('/internal/', '/')
   5360 
   5361   files_belong_to_same_module = filename_cc.endswith(filename_h)
   5362   common_path = ''
   5363   if files_belong_to_same_module:
   5364     common_path = filename_cc[:-len(filename_h)]
   5365   return files_belong_to_same_module, common_path
   5366 
   5367 
   5368 def UpdateIncludeState(filename, include_dict, io=codecs):
   5369   """Fill up the include_dict with new includes found from the file.
   5370 
   5371   Args:
   5372     filename: the name of the header to read.
   5373     include_dict: a dictionary in which the headers are inserted.
   5374     io: The io factory to use to read the file. Provided for testability.
   5375 
   5376   Returns:
   5377     True if a header was successfully added. False otherwise.
   5378   """
   5379   headerfile = None
   5380   try:
   5381     headerfile = io.open(filename, 'r', 'utf8', 'replace')
   5382   except IOError:
   5383     return False
   5384   linenum = 0
   5385   for line in headerfile:
   5386     linenum += 1
   5387     clean_line = CleanseComments(line)
   5388     match = _RE_PATTERN_INCLUDE.search(clean_line)
   5389     if match:
   5390       include = match.group(2)
   5391       include_dict.setdefault(include, linenum)
   5392   return True
   5393 
   5394 
   5395 def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
   5396                               io=codecs):
   5397   """Reports for missing stl includes.
   5398 
   5399   This function will output warnings to make sure you are including the headers
   5400   necessary for the stl containers and functions that you use. We only give one
   5401   reason to include a header. For example, if you use both equal_to<> and
   5402   less<> in a .h file, only one (the latter in the file) of these will be
   5403   reported as a reason to include the <functional>.
   5404 
   5405   Args:
   5406     filename: The name of the current file.
   5407     clean_lines: A CleansedLines instance containing the file.
   5408     include_state: An _IncludeState instance.
   5409     error: The function to call with any errors found.
   5410     io: The IO factory to use to read the header file. Provided for unittest
   5411         injection.
   5412   """
   5413   required = {}  # A map of header name to linenumber and the template entity.
   5414                  # Example of required: { '<functional>': (1219, 'less<>') }
   5415 
   5416   for linenum in xrange(clean_lines.NumLines()):
   5417     line = clean_lines.elided[linenum]
   5418     if not line or line[0] == '#':
   5419       continue
   5420 
   5421     # String is special -- it is a non-templatized type in STL.
   5422     matched = _RE_PATTERN_STRING.search(line)
   5423     if matched:
   5424       # Don't warn about strings in non-STL namespaces:
   5425       # (We check only the first match per line; good enough.)
   5426       prefix = line[:matched.start()]
   5427       if prefix.endswith('std::') or not prefix.endswith('::'):
   5428         required['<string>'] = (linenum, 'string')
   5429 
   5430     for pattern, template, header in _re_pattern_headers_maybe_templates:
   5431       if pattern.search(line):
   5432         required[header] = (linenum, template)
   5433 
   5434     # The following function is just a speed up, no semantics are changed.
   5435     if not '<' in line:  # Reduces the cpu time usage by skipping lines.
   5436       continue
   5437 
   5438     for pattern, template, header in _re_pattern_templates:
   5439       matched = pattern.search(line)
   5440       if matched:
   5441         # Don't warn about IWYU in non-STL namespaces:
   5442         # (We check only the first match per line; good enough.)
   5443         prefix = line[:matched.start()]
   5444         if prefix.endswith('std::') or not prefix.endswith('::'):
   5445           required[header] = (linenum, template)
   5446 
   5447   # The policy is that if you #include something in foo.h you don't need to
   5448   # include it again in foo.cc. Here, we will look at possible includes.
   5449   # Let's flatten the include_state include_list and copy it into a dictionary.
   5450   include_dict = dict([item for sublist in include_state.include_list
   5451                        for item in sublist])
   5452 
   5453   # Did we find the header for this file (if any) and successfully load it?
   5454   header_found = False
   5455 
   5456   # Use the absolute path so that matching works properly.
   5457   abs_filename = FileInfo(filename).FullName()
   5458 
   5459   # For Emacs's flymake.
   5460   # If cpplint is invoked from Emacs's flymake, a temporary file is generated
   5461   # by flymake and that file name might end with '_flymake.cc'. In that case,
   5462   # restore original file name here so that the corresponding header file can be
   5463   # found.
   5464   # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
   5465   # instead of 'foo_flymake.h'
   5466   abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
   5467 
   5468   # include_dict is modified during iteration, so we iterate over a copy of
   5469   # the keys.
   5470   header_keys = include_dict.keys()
   5471   for header in header_keys:
   5472     (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
   5473     fullpath = common_path + header
   5474     if same_module and UpdateIncludeState(fullpath, include_dict, io):
   5475       header_found = True
   5476 
   5477   # If we can't find the header file for a .cc, assume it's because we don't
   5478   # know where to look. In that case we'll give up as we're not sure they
   5479   # didn't include it in the .h file.
   5480   # TODO(unknown): Do a better job of finding .h files so we are confident that
   5481   # not having the .h file means there isn't one.
   5482   if filename.endswith('.cc') and not header_found:
   5483     return
   5484 
   5485   # All the lines have been processed, report the errors found.
   5486   for required_header_unstripped in required:
   5487     template = required[required_header_unstripped][1]
   5488     if required_header_unstripped.strip('<>"') not in include_dict:
   5489       error(filename, required[required_header_unstripped][0],
   5490             'build/include_what_you_use', 4,
   5491             'Add #include ' + required_header_unstripped + ' for ' + template)
   5492 
   5493 
   5494 _RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
   5495 
   5496 
   5497 def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
   5498   """Check that make_pair's template arguments are deduced.
   5499 
   5500   G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are
   5501   specified explicitly, and such use isn't intended in any case.
   5502 
   5503   Args:
   5504     filename: The name of the current file.
   5505     clean_lines: A CleansedLines instance containing the file.
   5506     linenum: The number of the line to check.
   5507     error: The function to call with any errors found.
   5508   """
   5509   line = clean_lines.elided[linenum]
   5510   match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
   5511   if match:
   5512     error(filename, linenum, 'build/explicit_make_pair',
   5513           4,  # 4 = high confidence
   5514           'For C++11-compatibility, omit template arguments from make_pair'
   5515           ' OR use pair directly OR if appropriate, construct a pair directly')
   5516 
   5517 
   5518 def CheckRedundantVirtual(filename, clean_lines, linenum, error):
   5519   """Check if line contains a redundant "virtual" function-specifier.
   5520 
   5521   Args:
   5522     filename: The name of the current file.
   5523     clean_lines: A CleansedLines instance containing the file.
   5524     linenum: The number of the line to check.
   5525     error: The function to call with any errors found.
   5526   """
   5527   # Look for "virtual" on current line.
   5528   line = clean_lines.elided[linenum]
   5529   virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line)
   5530   if not virtual: return
   5531 
   5532   # Ignore "virtual" keywords that are near access-specifiers.  These
   5533   # are only used in class base-specifier and do not apply to member
   5534   # functions.
   5535   if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or
   5536       Match(r'^\s+(public|protected|private)\b', virtual.group(3))):
   5537     return
   5538 
   5539   # Ignore the "virtual" keyword from virtual base classes.  Usually
   5540   # there is a column on the same line in these cases (virtual base
   5541   # classes are rare in google3 because multiple inheritance is rare).
   5542   if Match(r'^.*[^:]:[^:].*$', line): return
   5543 
   5544   # Look for the next opening parenthesis.  This is the start of the
   5545   # parameter list (possibly on the next line shortly after virtual).
   5546   # TODO(unknown): doesn't work if there are virtual functions with
   5547   # decltype() or other things that use parentheses, but csearch suggests
   5548   # that this is rare.
   5549   end_col = -1
   5550   end_line = -1
   5551   start_col = len(virtual.group(2))
   5552   for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())):
   5553     line = clean_lines.elided[start_line][start_col:]
   5554     parameter_list = Match(r'^([^(]*)\(', line)
   5555     if parameter_list:
   5556       # Match parentheses to find the end of the parameter list
   5557       (_, end_line, end_col) = CloseExpression(
   5558           clean_lines, start_line, start_col + len(parameter_list.group(1)))
   5559       break
   5560     start_col = 0
   5561 
   5562   if end_col < 0:
   5563     return  # Couldn't find end of parameter list, give up
   5564 
   5565   # Look for "override" or "final" after the parameter list
   5566   # (possibly on the next few lines).
   5567   for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())):
   5568     line = clean_lines.elided[i][end_col:]
   5569     match = Search(r'\b(override|final)\b', line)
   5570     if match:
   5571       error(filename, linenum, 'readability/inheritance', 4,
   5572             ('"virtual" is redundant since function is '
   5573              'already declared as "%s"' % match.group(1)))
   5574 
   5575     # Set end_col to check whole lines after we are done with the
   5576     # first line.
   5577     end_col = 0
   5578     if Search(r'[^\w]\s*$', line):
   5579       break
   5580 
   5581 
   5582 def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error):
   5583   """Check if line contains a redundant "override" or "final" virt-specifier.
   5584 
   5585   Args:
   5586     filename: The name of the current file.
   5587     clean_lines: A CleansedLines instance containing the file.
   5588     linenum: The number of the line to check.
   5589     error: The function to call with any errors found.
   5590   """
   5591   # Look for closing parenthesis nearby.  We need one to confirm where
   5592   # the declarator ends and where the virt-specifier starts to avoid
   5593   # false positives.
   5594   line = clean_lines.elided[linenum]
   5595   declarator_end = line.rfind(')')
   5596   if declarator_end >= 0:
   5597     fragment = line[declarator_end:]
   5598   else:
   5599     if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0:
   5600       fragment = line
   5601     else:
   5602       return
   5603 
   5604   # Check that at most one of "override" or "final" is present, not both
   5605   if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment):
   5606     error(filename, linenum, 'readability/inheritance', 4,
   5607           ('"override" is redundant since function is '
   5608            'already declared as "final"'))
   5609 
   5610 
   5611 
   5612 
   5613 # Returns true if we are at a new block, and it is directly
   5614 # inside of a namespace.
   5615 def IsBlockInNameSpace(nesting_state, is_forward_declaration):
   5616   """Checks that the new block is directly in a namespace.
   5617 
   5618   Args:
   5619     nesting_state: The _NestingState object that contains info about our state.
   5620     is_forward_declaration: If the class is a forward declared class.
   5621   Returns:
   5622     Whether or not the new block is directly in a namespace.
   5623   """
   5624   if is_forward_declaration:
   5625     if len(nesting_state.stack) >= 1 and (
   5626         isinstance(nesting_state.stack[-1], _NamespaceInfo)):
   5627       return True
   5628     else:
   5629       return False
   5630 
   5631   return (len(nesting_state.stack) > 1 and
   5632           nesting_state.stack[-1].check_namespace_indentation and
   5633           isinstance(nesting_state.stack[-2], _NamespaceInfo))
   5634 
   5635 
   5636 def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item,
   5637                                     raw_lines_no_comments, linenum):
   5638   """This method determines if we should apply our namespace indentation check.
   5639 
   5640   Args:
   5641     nesting_state: The current nesting state.
   5642     is_namespace_indent_item: If we just put a new class on the stack, True.
   5643       If the top of the stack is not a class, or we did not recently
   5644       add the class, False.
   5645     raw_lines_no_comments: The lines without the comments.
   5646     linenum: The current line number we are processing.
   5647 
   5648   Returns:
   5649     True if we should apply our namespace indentation check. Currently, it
   5650     only works for classes and namespaces inside of a namespace.
   5651   """
   5652 
   5653   is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments,
   5654                                                      linenum)
   5655 
   5656   if not (is_namespace_indent_item or is_forward_declaration):
   5657     return False
   5658 
   5659   # If we are in a macro, we do not want to check the namespace indentation.
   5660   if IsMacroDefinition(raw_lines_no_comments, linenum):
   5661     return False
   5662 
   5663   return IsBlockInNameSpace(nesting_state, is_forward_declaration)
   5664 
   5665 
   5666 # Call this method if the line is directly inside of a namespace.
   5667 # If the line above is blank (excluding comments) or the start of
   5668 # an inner namespace, it cannot be indented.
   5669 def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum,
   5670                                     error):
   5671   line = raw_lines_no_comments[linenum]
   5672   if Match(r'^\s+', line):
   5673     error(filename, linenum, 'runtime/indentation_namespace', 4,
   5674           'Do not indent within a namespace')
   5675 
   5676 
   5677 def ProcessLine(filename, file_extension, clean_lines, line,
   5678                 include_state, function_state, nesting_state, error,
   5679                 extra_check_functions=[]):
   5680   """Processes a single line in the file.
   5681 
   5682   Args:
   5683     filename: Filename of the file that is being processed.
   5684     file_extension: The extension (dot not included) of the file.
   5685     clean_lines: An array of strings, each representing a line of the file,
   5686                  with comments stripped.
   5687     line: Number of line being processed.
   5688     include_state: An _IncludeState instance in which the headers are inserted.
   5689     function_state: A _FunctionState instance which counts function lines, etc.
   5690     nesting_state: A NestingState instance which maintains information about
   5691                    the current stack of nested blocks being parsed.
   5692     error: A callable to which errors are reported, which takes 4 arguments:
   5693            filename, line number, error level, and message
   5694     extra_check_functions: An array of additional check functions that will be
   5695                            run on each source line. Each function takes 4
   5696                            arguments: filename, clean_lines, line, error
   5697   """
   5698   raw_lines = clean_lines.raw_lines
   5699   ParseNolintSuppressions(filename, raw_lines[line], line, error)
   5700   nesting_state.Update(filename, clean_lines, line, error)
   5701   CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line,
   5702                                error)
   5703   if nesting_state.InAsmBlock(): return
   5704   CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
   5705   CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
   5706   CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
   5707   CheckLanguage(filename, clean_lines, line, file_extension, include_state,
   5708                 nesting_state, error)
   5709   CheckForNonConstReference(filename, clean_lines, line, nesting_state, error)
   5710   CheckForNonStandardConstructs(filename, clean_lines, line,
   5711                                 nesting_state, error)
   5712   CheckVlogArguments(filename, clean_lines, line, error)
   5713   CheckPosixThreading(filename, clean_lines, line, error)
   5714   CheckInvalidIncrement(filename, clean_lines, line, error)
   5715   CheckMakePairUsesDeduction(filename, clean_lines, line, error)
   5716   CheckRedundantVirtual(filename, clean_lines, line, error)
   5717   CheckRedundantOverrideOrFinal(filename, clean_lines, line, error)
   5718   for check_fn in extra_check_functions:
   5719     check_fn(filename, clean_lines, line, error)
   5720 
   5721 def FlagCxx11Features(filename, clean_lines, linenum, error):
   5722   """Flag those c++11 features that we only allow in certain places.
   5723 
   5724   Args:
   5725     filename: The name of the current file.
   5726     clean_lines: A CleansedLines instance containing the file.
   5727     linenum: The number of the line to check.
   5728     error: The function to call with any errors found.
   5729   """
   5730   line = clean_lines.elided[linenum]
   5731 
   5732   include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
   5733 
   5734   # Flag unapproved C++ TR1 headers.
   5735   if include and include.group(1).startswith('tr1/'):
   5736     error(filename, linenum, 'build/c++tr1', 5,
   5737           ('C++ TR1 headers such as <%s> are unapproved.') % include.group(1))
   5738 
   5739   # Flag unapproved C++11 headers.
   5740   if include and include.group(1) in ('cfenv',
   5741                                       'condition_variable',
   5742                                       'fenv.h',
   5743                                       'future',
   5744                                       'mutex',
   5745                                       'thread',
   5746                                       'chrono',
   5747                                       'ratio',
   5748                                       'regex',
   5749                                       'system_error',
   5750                                      ):
   5751     error(filename, linenum, 'build/c++11', 5,
   5752           ('<%s> is an unapproved C++11 header.') % include.group(1))
   5753 
   5754   # The only place where we need to worry about C++11 keywords and library
   5755   # features in preprocessor directives is in macro definitions.
   5756   if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return
   5757 
   5758   # These are classes and free functions.  The classes are always
   5759   # mentioned as std::*, but we only catch the free functions if
   5760   # they're not found by ADL.  They're alphabetical by header.
   5761   for top_name in (
   5762       # type_traits
   5763       'alignment_of',
   5764       'aligned_union',
   5765       ):
   5766     if Search(r'\bstd::%s\b' % top_name, line):
   5767       error(filename, linenum, 'build/c++11', 5,
   5768             ('std::%s is an unapproved C++11 class or function.  Send c-style '
   5769              'an example of where it would make your code more readable, and '
   5770              'they may let you use it.') % top_name)
   5771 
   5772 
   5773 def FlagCxx14Features(filename, clean_lines, linenum, error):
   5774   """Flag those C++14 features that we restrict.
   5775 
   5776   Args:
   5777     filename: The name of the current file.
   5778     clean_lines: A CleansedLines instance containing the file.
   5779     linenum: The number of the line to check.
   5780     error: The function to call with any errors found.
   5781   """
   5782   line = clean_lines.elided[linenum]
   5783 
   5784   include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line)
   5785 
   5786   # Flag unapproved C++14 headers.
   5787   if include and include.group(1) in ('scoped_allocator', 'shared_mutex'):
   5788     error(filename, linenum, 'build/c++14', 5,
   5789           ('<%s> is an unapproved C++14 header.') % include.group(1))
   5790 
   5791 
   5792 def ProcessFileData(filename, file_extension, lines, error,
   5793                     extra_check_functions=[]):
   5794   """Performs lint checks and reports any errors to the given error function.
   5795 
   5796   Args:
   5797     filename: Filename of the file that is being processed.
   5798     file_extension: The extension (dot not included) of the file.
   5799     lines: An array of strings, each representing a line of the file, with the
   5800            last element being empty if the file is terminated with a newline.
   5801     error: A callable to which errors are reported, which takes 4 arguments:
   5802            filename, line number, error level, and message
   5803     extra_check_functions: An array of additional check functions that will be
   5804                            run on each source line. Each function takes 4
   5805                            arguments: filename, clean_lines, line, error
   5806   """
   5807   lines = (['// marker so line numbers and indices both start at 1'] + lines +
   5808            ['// marker so line numbers end in a known way'])
   5809 
   5810   include_state = _IncludeState()
   5811   function_state = _FunctionState()
   5812   nesting_state = NestingState()
   5813 
   5814   ResetNolintSuppressions()
   5815 
   5816   CheckForCopyright(filename, lines, error)
   5817   ProcessGlobalSuppresions(lines)
   5818   RemoveMultiLineComments(filename, lines, error)
   5819   clean_lines = CleansedLines(lines)
   5820 
   5821   if file_extension == 'h':
   5822     CheckForHeaderGuard(filename, clean_lines, error)
   5823 
   5824   for line in xrange(clean_lines.NumLines()):
   5825     ProcessLine(filename, file_extension, clean_lines, line,
   5826                 include_state, function_state, nesting_state, error,
   5827                 extra_check_functions)
   5828     FlagCxx11Features(filename, clean_lines, line, error)
   5829   nesting_state.CheckCompletedBlocks(filename, error)
   5830 
   5831   CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
   5832 
   5833   # Check that the .cc file has included its header if it exists.
   5834   if _IsSourceExtension(file_extension):
   5835     CheckHeaderFileIncluded(filename, include_state, error)
   5836 
   5837   # We check here rather than inside ProcessLine so that we see raw
   5838   # lines rather than "cleaned" lines.
   5839   CheckForBadCharacters(filename, lines, error)
   5840 
   5841   CheckForNewlineAtEOF(filename, lines, error)
   5842 
   5843 def ProcessConfigOverrides(filename):
   5844   """ Loads the configuration files and processes the config overrides.
   5845 
   5846   Args:
   5847     filename: The name of the file being processed by the linter.
   5848 
   5849   Returns:
   5850     False if the current |filename| should not be processed further.
   5851   """
   5852 
   5853   abs_filename = os.path.abspath(filename)
   5854   cfg_filters = []
   5855   keep_looking = True
   5856   while keep_looking:
   5857     abs_path, base_name = os.path.split(abs_filename)
   5858     if not base_name:
   5859       break  # Reached the root directory.
   5860 
   5861     cfg_file = os.path.join(abs_path, "CPPLINT.cfg")
   5862     abs_filename = abs_path
   5863     if not os.path.isfile(cfg_file):
   5864       continue
   5865 
   5866     try:
   5867       with open(cfg_file) as file_handle:
   5868         for line in file_handle:
   5869           line, _, _ = line.partition('#')  # Remove comments.
   5870           if not line.strip():
   5871             continue
   5872 
   5873           name, _, val = line.partition('=')
   5874           name = name.strip()
   5875           val = val.strip()
   5876           if name == 'set noparent':
   5877             keep_looking = False
   5878           elif name == 'filter':
   5879             cfg_filters.append(val)
   5880           elif name == 'exclude_files':
   5881             # When matching exclude_files pattern, use the base_name of
   5882             # the current file name or the directory name we are processing.
   5883             # For example, if we are checking for lint errors in /foo/bar/baz.cc
   5884             # and we found the .cfg file at /foo/CPPLINT.cfg, then the config
   5885             # file's "exclude_files" filter is meant to be checked against "bar"
   5886             # and not "baz" nor "bar/baz.cc".
   5887             if base_name:
   5888               pattern = re.compile(val)
   5889               if pattern.match(base_name):
   5890                 sys.stderr.write('Ignoring "%s": file excluded by "%s". '
   5891                                  'File path component "%s" matches '
   5892                                  'pattern "%s"\n' %
   5893                                  (filename, cfg_file, base_name, val))
   5894                 return False
   5895           elif name == 'linelength':
   5896             global _line_length
   5897             try:
   5898                 _line_length = int(val)
   5899             except ValueError:
   5900                 sys.stderr.write('Line length must be numeric.')
   5901           elif name == 'root':
   5902             global _root
   5903             _root = val
   5904           else:
   5905             sys.stderr.write(
   5906                 'Invalid configuration option (%s) in file %s\n' %
   5907                 (name, cfg_file))
   5908 
   5909     except IOError:
   5910       sys.stderr.write(
   5911           "Skipping config file '%s': Can't open for reading\n" % cfg_file)
   5912       keep_looking = False
   5913 
   5914   # Apply all the accumulated filters in reverse order (top-level directory
   5915   # config options having the least priority).
   5916   for filter in reversed(cfg_filters):
   5917      _AddFilters(filter)
   5918 
   5919   return True
   5920 
   5921 
   5922 def ProcessFile(filename, vlevel, extra_check_functions=[]):
   5923   """Does google-lint on a single file.
   5924 
   5925   Args:
   5926     filename: The name of the file to parse.
   5927 
   5928     vlevel: The level of errors to report.  Every error of confidence
   5929     >= verbose_level will be reported.  0 is a good default.
   5930 
   5931     extra_check_functions: An array of additional check functions that will be
   5932                            run on each source line. Each function takes 4
   5933                            arguments: filename, clean_lines, line, error
   5934   """
   5935 
   5936   _SetVerboseLevel(vlevel)
   5937   _BackupFilters()
   5938 
   5939   if not ProcessConfigOverrides(filename):
   5940     _RestoreFilters()
   5941     return
   5942 
   5943   lf_lines = []
   5944   crlf_lines = []
   5945   try:
   5946     # Support the UNIX convention of using "-" for stdin.  Note that
   5947     # we are not opening the file with universal newline support
   5948     # (which codecs doesn't support anyway), so the resulting lines do
   5949     # contain trailing '\r' characters if we are reading a file that
   5950     # has CRLF endings.
   5951     # If after the split a trailing '\r' is present, it is removed
   5952     # below.
   5953     if filename == '-':
   5954       lines = codecs.StreamReaderWriter(sys.stdin,
   5955                                         codecs.getreader('utf8'),
   5956                                         codecs.getwriter('utf8'),
   5957                                         'replace').read().split('\n')
   5958     else:
   5959       lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
   5960 
   5961     # Remove trailing '\r'.
   5962     # The -1 accounts for the extra trailing blank line we get from split()
   5963     for linenum in range(len(lines) - 1):
   5964       if lines[linenum].endswith('\r'):
   5965         lines[linenum] = lines[linenum].rstrip('\r')
   5966         crlf_lines.append(linenum + 1)
   5967       else:
   5968         lf_lines.append(linenum + 1)
   5969 
   5970   except IOError:
   5971     sys.stderr.write(
   5972         "Skipping input '%s': Can't open for reading\n" % filename)
   5973     _RestoreFilters()
   5974     return
   5975 
   5976   # Note, if no dot is found, this will give the entire filename as the ext.
   5977   file_extension = filename[filename.rfind('.') + 1:]
   5978 
   5979   # When reading from stdin, the extension is unknown, so no cpplint tests
   5980   # should rely on the extension.
   5981   if filename != '-' and file_extension not in _valid_extensions:
   5982     sys.stderr.write('Ignoring %s; not a valid file name '
   5983                      '(%s)\n' % (filename, ', '.join(_valid_extensions)))
   5984   else:
   5985     ProcessFileData(filename, file_extension, lines, Error,
   5986                     extra_check_functions)
   5987 
   5988     # If end-of-line sequences are a mix of LF and CR-LF, issue
   5989     # warnings on the lines with CR.
   5990     #
   5991     # Don't issue any warnings if all lines are uniformly LF or CR-LF,
   5992     # since critique can handle these just fine, and the style guide
   5993     # doesn't dictate a particular end of line sequence.
   5994     #
   5995     # We can't depend on os.linesep to determine what the desired
   5996     # end-of-line sequence should be, since that will return the
   5997     # server-side end-of-line sequence.
   5998     if lf_lines and crlf_lines:
   5999       # Warn on every line with CR.  An alternative approach might be to
   6000       # check whether the file is mostly CRLF or just LF, and warn on the
   6001       # minority, we bias toward LF here since most tools prefer LF.
   6002       for linenum in crlf_lines:
   6003         Error(filename, linenum, 'whitespace/newline', 1,
   6004               'Unexpected \\r (^M) found; better to use only \\n')
   6005 
   6006   sys.stderr.write('Done processing %s\n' % filename)
   6007   _RestoreFilters()
   6008 
   6009 
   6010 def PrintUsage(message):
   6011   """Prints a brief usage string and exits, optionally with an error message.
   6012 
   6013   Args:
   6014     message: The optional error message.
   6015   """
   6016   sys.stderr.write(_USAGE)
   6017   if message:
   6018     sys.exit('\nFATAL ERROR: ' + message)
   6019   else:
   6020     sys.exit(1)
   6021 
   6022 
   6023 def PrintCategories():
   6024   """Prints a list of all the error-categories used by error messages.
   6025 
   6026   These are the categories used to filter messages via --filter.
   6027   """
   6028   sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
   6029   sys.exit(0)
   6030 
   6031 
   6032 def ParseArguments(args):
   6033   """Parses the command line arguments.
   6034 
   6035   This may set the output format and verbosity level as side-effects.
   6036 
   6037   Args:
   6038     args: The command line arguments:
   6039 
   6040   Returns:
   6041     The list of filenames to lint.
   6042   """
   6043   try:
   6044     (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
   6045                                                  'counting=',
   6046                                                  'filter=',
   6047                                                  'root=',
   6048                                                  'linelength=',
   6049                                                  'extensions='])
   6050   except getopt.GetoptError:
   6051     PrintUsage('Invalid arguments.')
   6052 
   6053   verbosity = _VerboseLevel()
   6054   output_format = _OutputFormat()
   6055   filters = ''
   6056   counting_style = ''
   6057 
   6058   for (opt, val) in opts:
   6059     if opt == '--help':
   6060       PrintUsage(None)
   6061     elif opt == '--output':
   6062       if val not in ('emacs', 'vs7', 'eclipse'):
   6063         PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
   6064       output_format = val
   6065     elif opt == '--verbose':
   6066       verbosity = int(val)
   6067     elif opt == '--filter':
   6068       filters = val
   6069       if not filters:
   6070         PrintCategories()
   6071     elif opt == '--counting':
   6072       if val not in ('total', 'toplevel', 'detailed'):
   6073         PrintUsage('Valid counting options are total, toplevel, and detailed')
   6074       counting_style = val
   6075     elif opt == '--root':
   6076       global _root
   6077       _root = val
   6078     elif opt == '--linelength':
   6079       global _line_length
   6080       try:
   6081           _line_length = int(val)
   6082       except ValueError:
   6083           PrintUsage('Line length must be digits.')
   6084     elif opt == '--extensions':
   6085       global _valid_extensions
   6086       try:
   6087           _valid_extensions = set(val.split(','))
   6088       except ValueError:
   6089           PrintUsage('Extensions must be comma seperated list.')
   6090 
   6091   if not filenames:
   6092     PrintUsage('No files were specified.')
   6093 
   6094   _SetOutputFormat(output_format)
   6095   _SetVerboseLevel(verbosity)
   6096   _SetFilters(filters)
   6097   _SetCountingStyle(counting_style)
   6098 
   6099   return filenames
   6100 
   6101 
   6102 def main():
   6103   filenames = ParseArguments(sys.argv[1:])
   6104 
   6105   # Change stderr to write with replacement characters so we don't die
   6106   # if we try to print something containing non-ASCII characters.
   6107   sys.stderr = codecs.StreamReaderWriter(sys.stderr,
   6108                                          codecs.getreader('utf8'),
   6109                                          codecs.getwriter('utf8'),
   6110                                          'replace')
   6111 
   6112   _cpplint_state.ResetErrorCounts()
   6113   for filename in filenames:
   6114     ProcessFile(filename, _cpplint_state.verbose_level)
   6115   _cpplint_state.PrintErrorCounts()
   6116 
   6117   sys.exit(_cpplint_state.error_count > 0)
   6118 
   6119 
   6120 if __name__ == '__main__':
   6121   main()
   6122