Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (c) 2009 Google Inc. All rights reserved.
      4 #
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #    * Redistributions of source code must retain the above copyright
     10 # notice, this list of conditions and the following disclaimer.
     11 #    * Redistributions in binary form must reproduce the above
     12 # copyright notice, this list of conditions and the following disclaimer
     13 # in the documentation and/or other materials provided with the
     14 # distribution.
     15 #    * Neither the name of Google Inc. nor the names of its
     16 # contributors may be used to endorse or promote products derived from
     17 # this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 # Here are some issues that I've had people identify in my code during reviews,
     32 # that I think are possible to flag automatically in a lint tool.  If these were
     33 # caught by lint, it would save time both for myself and that of my reviewers.
     34 # Most likely, some of these are beyond the scope of the current lint framework,
     35 # but I think it is valuable to retain these wish-list items even if they cannot
     36 # be immediately implemented.
     37 #
     38 #  Suggestions
     39 #  -----------
     40 #  - Check for no 'explicit' for multi-arg ctor
     41 #  - Check for boolean assign RHS in parens
     42 #  - Check for ctor initializer-list colon position and spacing
     43 #  - Check that if there's a ctor, there should be a dtor
     44 #  - Check accessors that return non-pointer member variables are
     45 #    declared const
     46 #  - Check accessors that return non-const pointer member vars are
     47 #    *not* declared const
     48 #  - Check for using public includes for testing
     49 #  - Check for spaces between brackets in one-line inline method
     50 #  - Check for no assert()
     51 #  - Check for spaces surrounding operators
     52 #  - Check for 0 in pointer context (should be NULL)
     53 #  - Check for 0 in char context (should be '\0')
     54 #  - Check for camel-case method name conventions for methods
     55 #    that are not simple inline getters and setters
     56 #  - Do not indent namespace contents
     57 #  - Avoid inlining non-trivial constructors in header files
     58 #  - Check for old-school (void) cast for call-sites of functions
     59 #    ignored return value
     60 #  - Check gUnit usage of anonymous namespace
     61 #  - Check for class declaration order (typedefs, consts, enums,
     62 #    ctor(s?), dtor, friend declarations, methods, member vars)
     63 #
     64 
     65 """Does google-lint on c++ files.
     66 
     67 The goal of this script is to identify places in the code that *may*
     68 be in non-compliance with google style.  It does not attempt to fix
     69 up these problems -- the point is to educate.  It does also not
     70 attempt to find all problems, or to ensure that everything it does
     71 find is legitimately a problem.
     72 
     73 In particular, we can get very confused by /* and // inside strings!
     74 We do a small hack, which is to ignore //'s with "'s after them on the
     75 same line, but it is far from perfect (in either direction).
     76 """
     77 
     78 import codecs
     79 import copy
     80 import getopt
     81 import math  # for log
     82 import os
     83 import re
     84 import sre_compile
     85 import string
     86 import sys
     87 import unicodedata
     88 
     89 
     90 _USAGE = """
     91 Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
     92                    [--counting=total|toplevel|detailed]
     93                    [--quiet]
     94         <file> [file] ...
     95 
     96   The style guidelines this tries to follow are those in
     97     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
     98 
     99   Every problem is given a confidence score from 1-5, with 5 meaning we are
    100   certain of the problem, and 1 meaning it could be a legitimate construct.
    101   This will miss some errors, and is not a substitute for a code review.
    102 
    103   To suppress false-positive errors of a certain category, add a
    104   'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
    105   suppresses errors of all categories on that line.
    106 
    107   The files passed in will be linted; at least one file must be provided.
    108   Linted extensions are .cc, .cpp, and .h.  Other file types will be ignored.
    109 
    110   Flags:
    111 
    112     output=vs7
    113       By default, the output is formatted to ease emacs parsing.  Visual Studio
    114       compatible output (vs7) may also be used.  Other formats are unsupported.
    115 
    116     verbose=#
    117       Specify a number 0-5 to restrict errors to certain verbosity levels.
    118 
    119     quiet
    120       Don't print anything if no errors are found.
    121 
    122     filter=-x,+y,...
    123       Specify a comma-separated list of category-filters to apply: only
    124       error messages whose category names pass the filters will be printed.
    125       (Category names are printed with the message and look like
    126       "[whitespace/indent]".)  Filters are evaluated left to right.
    127       "-FOO" and "FOO" means "do not print categories that start with FOO".
    128       "+FOO" means "do print categories that start with FOO".
    129 
    130       Examples: --filter=-whitespace,+whitespace/braces
    131                 --filter=whitespace,runtime/printf,+runtime/printf_format
    132                 --filter=-,+build/include_what_you_use
    133 
    134       To see a list of all the categories used in cpplint, pass no arg:
    135          --filter=
    136 
    137     counting=total|toplevel|detailed
    138       The total number of errors found is always printed. If
    139       'toplevel' is provided, then the count of errors in each of
    140       the top-level categories like 'build' and 'whitespace' will
    141       also be printed. If 'detailed' is provided, then a count
    142       is provided for each category like 'build/class'.
    143 
    144     root=subdir
    145       The root directory used for deriving header guard CPP variable.
    146       By default, the header guard CPP variable is calculated as the relative
    147       path to the directory that contains .git, .hg, or .svn.  When this flag
    148       is specified, the relative path is calculated from the specified
    149       directory. If the specified directory does not exist, this flag is
    150       ignored.
    151 
    152       Examples:
    153         Assuing that src/.git exists, the header guard CPP variables for
    154         src/chrome/browser/ui/browser.h are:
    155 
    156         No flag => CHROME_BROWSER_UI_BROWSER_H_
    157         --root=chrome => BROWSER_UI_BROWSER_H_
    158         --root=chrome/browser => UI_BROWSER_H_
    159 """
    160 
    161 # We categorize each error message we print.  Here are the categories.
    162 # We want an explicit list so we can list them all in cpplint --filter=.
    163 # If you add a new error message with a new category, add it to the list
    164 # here!  cpplint_unittest.py should tell you if you forget to do this.
    165 # \ used for clearer layout -- pylint: disable-msg=C6013
    166 _ERROR_CATEGORIES = [
    167   'build/class',
    168   'build/deprecated',
    169   'build/endif_comment',
    170   'build/explicit_make_pair',
    171   'build/forward_decl',
    172   'build/header_guard',
    173   'build/include',
    174   'build/include_alpha',
    175   'build/include_order',
    176   'build/include_what_you_use',
    177   'build/namespaces',
    178   'build/printf_format',
    179   'build/storage_class',
    180   'legal/copyright',
    181   'readability/alt_tokens',
    182   'readability/braces',
    183   'readability/casting',
    184   'readability/check',
    185   'readability/constructors',
    186   'readability/fn_size',
    187   'readability/function',
    188   'readability/multiline_comment',
    189   'readability/multiline_string',
    190   'readability/namespace',
    191   'readability/nolint',
    192   'readability/streams',
    193   'readability/todo',
    194   'readability/utf8',
    195   'runtime/arrays',
    196   'runtime/casting',
    197   'runtime/explicit',
    198   'runtime/int',
    199   'runtime/init',
    200   'runtime/invalid_increment',
    201   'runtime/member_string_references',
    202   'runtime/memset',
    203   'runtime/operator',
    204   'runtime/printf',
    205   'runtime/printf_format',
    206   'runtime/references',
    207   'runtime/rtti',
    208   'runtime/sizeof',
    209   'runtime/string',
    210   'runtime/threadsafe_fn',
    211   'whitespace/blank_line',
    212   'whitespace/braces',
    213   'whitespace/comma',
    214   'whitespace/comments',
    215   'whitespace/empty_loop_body',
    216   'whitespace/end_of_line',
    217   'whitespace/ending_newline',
    218   'whitespace/forcolon',
    219   'whitespace/indent',
    220   'whitespace/labels',
    221   'whitespace/line_length',
    222   'whitespace/newline',
    223   'whitespace/operators',
    224   'whitespace/parens',
    225   'whitespace/semicolon',
    226   'whitespace/tab',
    227   'whitespace/todo'
    228   ]
    229 
    230 # The default state of the category filter. This is overrided by the --filter=
    231 # flag. By default all errors are on, so only add here categories that should be
    232 # off by default (i.e., categories that must be enabled by the --filter= flags).
    233 # All entries here should start with a '-' or '+', as in the --filter= flag.
    234 _DEFAULT_FILTERS = ['-build/include_alpha']
    235 
    236 # We used to check for high-bit characters, but after much discussion we
    237 # decided those were OK, as long as they were in UTF-8 and didn't represent
    238 # hard-coded international strings, which belong in a separate i18n file.
    239 
    240 # Headers that we consider STL headers.
    241 _STL_HEADERS = frozenset([
    242     'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
    243     'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
    244     'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'new',
    245     'pair.h', 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
    246     'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
    247     'utility', 'vector', 'vector.h',
    248     ])
    249 
    250 
    251 # Non-STL C++ system headers.
    252 _CPP_HEADERS = frozenset([
    253     'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
    254     'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
    255     'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
    256     'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
    257     'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
    258     'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
    259     'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream',
    260     'istream.h', 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
    261     'numeric', 'ostream', 'ostream.h', 'parsestream.h', 'pfstream.h',
    262     'PlotFile.h', 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h',
    263     'ropeimpl.h', 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
    264     'stdiostream.h', 'streambuf', 'streambuf.h', 'stream.h', 'strfile.h',
    265     'string', 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo',
    266     'valarray',
    267     ])
    268 
    269 
    270 # Assertion macros.  These are defined in base/logging.h and
    271 # testing/base/gunit.h.  Note that the _M versions need to come first
    272 # for substring matching to work.
    273 _CHECK_MACROS = [
    274     'DCHECK', 'CHECK',
    275     'EXPECT_TRUE_M', 'EXPECT_TRUE',
    276     'ASSERT_TRUE_M', 'ASSERT_TRUE',
    277     'EXPECT_FALSE_M', 'EXPECT_FALSE',
    278     'ASSERT_FALSE_M', 'ASSERT_FALSE',
    279     ]
    280 
    281 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
    282 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
    283 
    284 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
    285                         ('>=', 'GE'), ('>', 'GT'),
    286                         ('<=', 'LE'), ('<', 'LT')]:
    287   _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
    288   _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
    289   _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    290   _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    291   _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
    292   _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
    293 
    294 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
    295                             ('>=', 'LT'), ('>', 'LE'),
    296                             ('<=', 'GT'), ('<', 'GE')]:
    297   _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    298   _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    299   _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
    300   _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
    301 
    302 # Alternative tokens and their replacements.  For full list, see section 2.5
    303 # Alternative tokens [lex.digraph] in the C++ standard.
    304 #
    305 # Digraphs (such as '%:') are not included here since it's a mess to
    306 # match those on a word boundary.
    307 _ALT_TOKEN_REPLACEMENT = {
    308     'and': '&&',
    309     'bitor': '|',
    310     'or': '||',
    311     'xor': '^',
    312     'compl': '~',
    313     'bitand': '&',
    314     'and_eq': '&=',
    315     'or_eq': '|=',
    316     'xor_eq': '^=',
    317     'not': '!',
    318     'not_eq': '!='
    319     }
    320 
    321 # Compile regular expression that matches all the above keywords.  The "[ =()]"
    322 # bit is meant to avoid matching these keywords outside of boolean expressions.
    323 #
    324 # False positives include C-style multi-line comments (http://go/nsiut )
    325 # and multi-line strings (http://go/beujw ), but those have always been
    326 # troublesome for cpplint.
    327 _ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
    328     r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
    329 
    330 
    331 # These constants define types of headers for use with
    332 # _IncludeState.CheckNextIncludeOrder().
    333 _C_SYS_HEADER = 1
    334 _CPP_SYS_HEADER = 2
    335 _LIKELY_MY_HEADER = 3
    336 _POSSIBLE_MY_HEADER = 4
    337 _OTHER_HEADER = 5
    338 
    339 # These constants define the current inline assembly state
    340 _NO_ASM = 0       # Outside of inline assembly block
    341 _INSIDE_ASM = 1   # Inside inline assembly block
    342 _END_ASM = 2      # Last line of inline assembly block
    343 _BLOCK_ASM = 3    # The whole block is an inline assembly block
    344 
    345 # Match start of assembly blocks
    346 _MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
    347                         r'(?:\s+(volatile|__volatile__))?'
    348                         r'\s*[{(]')
    349 
    350 
    351 _regexp_compile_cache = {}
    352 
    353 # Finds occurrences of NOLINT or NOLINT(...).
    354 _RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
    355 
    356 # {str, set(int)}: a map from error categories to sets of linenumbers
    357 # on which those errors are expected and should be suppressed.
    358 _error_suppressions = {}
    359 
    360 # The root directory used for deriving header guard CPP variable.
    361 # This is set by --root flag.
    362 _root = None
    363 
    364 def ParseNolintSuppressions(filename, raw_line, linenum, error):
    365   """Updates the global list of error-suppressions.
    366 
    367   Parses any NOLINT comments on the current line, updating the global
    368   error_suppressions store.  Reports an error if the NOLINT comment
    369   was malformed.
    370 
    371   Args:
    372     filename: str, the name of the input file.
    373     raw_line: str, the line of input text, with comments.
    374     linenum: int, the number of the current line.
    375     error: function, an error handler.
    376   """
    377   # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
    378   matched = _RE_SUPPRESSION.search(raw_line)
    379   if matched:
    380     category = matched.group(1)
    381     if category in (None, '(*)'):  # => "suppress all"
    382       _error_suppressions.setdefault(None, set()).add(linenum)
    383     else:
    384       if category.startswith('(') and category.endswith(')'):
    385         category = category[1:-1]
    386         if category in _ERROR_CATEGORIES:
    387           _error_suppressions.setdefault(category, set()).add(linenum)
    388         else:
    389           error(filename, linenum, 'readability/nolint', 5,
    390                 'Unknown NOLINT error category: %s' % category)
    391 
    392 
    393 def ResetNolintSuppressions():
    394   "Resets the set of NOLINT suppressions to empty."
    395   _error_suppressions.clear()
    396 
    397 
    398 def IsErrorSuppressedByNolint(category, linenum):
    399   """Returns true if the specified error category is suppressed on this line.
    400 
    401   Consults the global error_suppressions map populated by
    402   ParseNolintSuppressions/ResetNolintSuppressions.
    403 
    404   Args:
    405     category: str, the category of the error.
    406     linenum: int, the current line number.
    407   Returns:
    408     bool, True iff the error should be suppressed due to a NOLINT comment.
    409   """
    410   return (linenum in _error_suppressions.get(category, set()) or
    411           linenum in _error_suppressions.get(None, set()))
    412 
    413 def Match(pattern, s):
    414   """Matches the string with the pattern, caching the compiled regexp."""
    415   # The regexp compilation caching is inlined in both Match and Search for
    416   # performance reasons; factoring it out into a separate function turns out
    417   # to be noticeably expensive.
    418   if not pattern in _regexp_compile_cache:
    419     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    420   return _regexp_compile_cache[pattern].match(s)
    421 
    422 
    423 def Search(pattern, s):
    424   """Searches the string for the pattern, caching the compiled regexp."""
    425   if not pattern in _regexp_compile_cache:
    426     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    427   return _regexp_compile_cache[pattern].search(s)
    428 
    429 
    430 class _IncludeState(dict):
    431   """Tracks line numbers for includes, and the order in which includes appear.
    432 
    433   As a dict, an _IncludeState object serves as a mapping between include
    434   filename and line number on which that file was included.
    435 
    436   Call CheckNextIncludeOrder() once for each header in the file, passing
    437   in the type constants defined above. Calls in an illegal order will
    438   raise an _IncludeError with an appropriate error message.
    439 
    440   """
    441   # self._section will move monotonically through this set. If it ever
    442   # needs to move backwards, CheckNextIncludeOrder will raise an error.
    443   _INITIAL_SECTION = 0
    444   _MY_H_SECTION = 1
    445   _C_SECTION = 2
    446   _CPP_SECTION = 3
    447   _OTHER_H_SECTION = 4
    448 
    449   _TYPE_NAMES = {
    450       _C_SYS_HEADER: 'C system header',
    451       _CPP_SYS_HEADER: 'C++ system header',
    452       _LIKELY_MY_HEADER: 'header this file implements',
    453       _POSSIBLE_MY_HEADER: 'header this file may implement',
    454       _OTHER_HEADER: 'other header',
    455       }
    456   _SECTION_NAMES = {
    457       _INITIAL_SECTION: "... nothing. (This can't be an error.)",
    458       _MY_H_SECTION: 'a header this file implements',
    459       _C_SECTION: 'C system header',
    460       _CPP_SECTION: 'C++ system header',
    461       _OTHER_H_SECTION: 'other header',
    462       }
    463 
    464   def __init__(self):
    465     dict.__init__(self)
    466     # The name of the current section.
    467     self._section = self._INITIAL_SECTION
    468     # The path of last found header.
    469     self._last_header = ''
    470 
    471   def CanonicalizeAlphabeticalOrder(self, header_path):
    472     """Returns a path canonicalized for alphabetical comparison.
    473 
    474     - replaces "-" with "_" so they both cmp the same.
    475     - removes '-inl' since we don't require them to be after the main header.
    476     - lowercase everything, just in case.
    477 
    478     Args:
    479       header_path: Path to be canonicalized.
    480 
    481     Returns:
    482       Canonicalized path.
    483     """
    484     return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
    485 
    486   def IsInAlphabeticalOrder(self, header_path):
    487     """Check if a header is in alphabetical order with the previous header.
    488 
    489     Args:
    490       header_path: Header to be checked.
    491 
    492     Returns:
    493       Returns true if the header is in alphabetical order.
    494     """
    495     canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
    496     if self._last_header > canonical_header:
    497       return False
    498     self._last_header = canonical_header
    499     return True
    500 
    501   def CheckNextIncludeOrder(self, header_type):
    502     """Returns a non-empty error message if the next header is out of order.
    503 
    504     This function also updates the internal state to be ready to check
    505     the next include.
    506 
    507     Args:
    508       header_type: One of the _XXX_HEADER constants defined above.
    509 
    510     Returns:
    511       The empty string if the header is in the right order, or an
    512       error message describing what's wrong.
    513 
    514     """
    515     error_message = ('Found %s after %s' %
    516                      (self._TYPE_NAMES[header_type],
    517                       self._SECTION_NAMES[self._section]))
    518 
    519     last_section = self._section
    520 
    521     if header_type == _C_SYS_HEADER:
    522       if self._section <= self._C_SECTION:
    523         self._section = self._C_SECTION
    524       else:
    525         self._last_header = ''
    526         return error_message
    527     elif header_type == _CPP_SYS_HEADER:
    528       if self._section <= self._CPP_SECTION:
    529         self._section = self._CPP_SECTION
    530       else:
    531         self._last_header = ''
    532         return error_message
    533     elif header_type == _LIKELY_MY_HEADER:
    534       if self._section <= self._MY_H_SECTION:
    535         self._section = self._MY_H_SECTION
    536       else:
    537         self._section = self._OTHER_H_SECTION
    538     elif header_type == _POSSIBLE_MY_HEADER:
    539       if self._section <= self._MY_H_SECTION:
    540         self._section = self._MY_H_SECTION
    541       else:
    542         # This will always be the fallback because we're not sure
    543         # enough that the header is associated with this file.
    544         self._section = self._OTHER_H_SECTION
    545     else:
    546       assert header_type == _OTHER_HEADER
    547       self._section = self._OTHER_H_SECTION
    548 
    549     if last_section != self._section:
    550       self._last_header = ''
    551 
    552     return ''
    553 
    554 
    555 class _CppLintState(object):
    556   """Maintains module-wide state.."""
    557 
    558   def __init__(self):
    559     self.verbose_level = 1  # global setting.
    560     self.error_count = 0    # global count of reported errors
    561     # filters to apply when emitting error messages
    562     self.filters = _DEFAULT_FILTERS[:]
    563     self.counting = 'total'  # In what way are we counting errors?
    564     self.errors_by_category = {}  # string to int dict storing error counts
    565     # BEGIN android-added
    566     self.quiet = False      # global setting.
    567     # END android-added
    568 
    569     # output format:
    570     # "emacs" - format that emacs can parse (default)
    571     # "vs7" - format that Microsoft Visual Studio 7 can parse
    572     self.output_format = 'emacs'
    573 
    574   def SetOutputFormat(self, output_format):
    575     """Sets the output format for errors."""
    576     self.output_format = output_format
    577 
    578   # BEGIN android-added
    579   def SetQuiet(self, level):
    580     """Sets the module's quiet setting, and returns the previous setting."""
    581     last_quiet = self.quiet
    582     self.quiet = level
    583     return last_quiet
    584   # END android-added
    585 
    586   def SetVerboseLevel(self, level):
    587     """Sets the module's verbosity, and returns the previous setting."""
    588     last_verbose_level = self.verbose_level
    589     self.verbose_level = level
    590     return last_verbose_level
    591 
    592   def SetCountingStyle(self, counting_style):
    593     """Sets the module's counting options."""
    594     self.counting = counting_style
    595 
    596   def SetFilters(self, filters):
    597     """Sets the error-message filters.
    598 
    599     These filters are applied when deciding whether to emit a given
    600     error message.
    601 
    602     Args:
    603       filters: A string of comma-separated filters (eg "+whitespace/indent").
    604                Each filter should start with + or -; else we die.
    605 
    606     Raises:
    607       ValueError: The comma-separated filters did not all start with '+' or '-'.
    608                   E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
    609     """
    610     # Default filters always have less priority than the flag ones.
    611     self.filters = _DEFAULT_FILTERS[:]
    612     for filt in filters.split(','):
    613       clean_filt = filt.strip()
    614       if clean_filt:
    615         self.filters.append(clean_filt)
    616     for filt in self.filters:
    617       if not (filt.startswith('+') or filt.startswith('-')):
    618         raise ValueError('Every filter in --filters must start with + or -'
    619                          ' (%s does not)' % filt)
    620 
    621   def ResetErrorCounts(self):
    622     """Sets the module's error statistic back to zero."""
    623     self.error_count = 0
    624     self.errors_by_category = {}
    625 
    626   def IncrementErrorCount(self, category):
    627     """Bumps the module's error statistic."""
    628     self.error_count += 1
    629     if self.counting in ('toplevel', 'detailed'):
    630       if self.counting != 'detailed':
    631         category = category.split('/')[0]
    632       if category not in self.errors_by_category:
    633         self.errors_by_category[category] = 0
    634       self.errors_by_category[category] += 1
    635 
    636   def PrintErrorCounts(self):
    637     """Print a summary of errors by category, and the total."""
    638     for category, count in self.errors_by_category.iteritems():
    639       sys.stderr.write('Category \'%s\' errors found: %d\n' %
    640                        (category, count))
    641     sys.stderr.write('Total errors found: %d\n' % self.error_count)
    642 
    643 _cpplint_state = _CppLintState()
    644 
    645 
    646 def _OutputFormat():
    647   """Gets the module's output format."""
    648   return _cpplint_state.output_format
    649 
    650 
    651 def _SetOutputFormat(output_format):
    652   """Sets the module's output format."""
    653   _cpplint_state.SetOutputFormat(output_format)
    654 
    655 
    656 # BEGIN android-added
    657 def _Quiet():
    658   """Returns the module's quiet setting."""
    659   return _cpplint_state.quiet
    660 
    661 
    662 def _SetQuiet(level):
    663   """Sets the module's quiet status, and returns the previous setting."""
    664   return _cpplint_state.SetQuiet(level)
    665 # END android-added
    666 
    667 def _VerboseLevel():
    668   """Returns the module's verbosity setting."""
    669   return _cpplint_state.verbose_level
    670 
    671 
    672 def _SetVerboseLevel(level):
    673   """Sets the module's verbosity, and returns the previous setting."""
    674   return _cpplint_state.SetVerboseLevel(level)
    675 
    676 
    677 def _SetCountingStyle(level):
    678   """Sets the module's counting options."""
    679   _cpplint_state.SetCountingStyle(level)
    680 
    681 
    682 def _Filters():
    683   """Returns the module's list of output filters, as a list."""
    684   return _cpplint_state.filters
    685 
    686 
    687 def _SetFilters(filters):
    688   """Sets the module's error-message filters.
    689 
    690   These filters are applied when deciding whether to emit a given
    691   error message.
    692 
    693   Args:
    694     filters: A string of comma-separated filters (eg "whitespace/indent").
    695              Each filter should start with + or -; else we die.
    696   """
    697   _cpplint_state.SetFilters(filters)
    698 
    699 
    700 class _FunctionState(object):
    701   """Tracks current function name and the number of lines in its body."""
    702 
    703   _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    704   _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
    705 
    706   def __init__(self):
    707     self.in_a_function = False
    708     self.lines_in_function = 0
    709     self.current_function = ''
    710 
    711   def Begin(self, function_name):
    712     """Start analyzing function body.
    713 
    714     Args:
    715       function_name: The name of the function being tracked.
    716     """
    717     self.in_a_function = True
    718     self.lines_in_function = 0
    719     self.current_function = function_name
    720 
    721   def Count(self):
    722     """Count line in current function body."""
    723     if self.in_a_function:
    724       self.lines_in_function += 1
    725 
    726   def Check(self, error, filename, linenum):
    727     """Report if too many lines in function body.
    728 
    729     Args:
    730       error: The function to call with any errors found.
    731       filename: The name of the current file.
    732       linenum: The number of the line to check.
    733     """
    734     # BEGIN android-added
    735     if not self.in_a_function:
    736       return
    737     # END android-added
    738     if Match(r'T(EST|est)', self.current_function):
    739       base_trigger = self._TEST_TRIGGER
    740     else:
    741       base_trigger = self._NORMAL_TRIGGER
    742     trigger = base_trigger * 2**_VerboseLevel()
    743 
    744     if self.lines_in_function > trigger:
    745       error_level = int(math.log(self.lines_in_function / base_trigger, 2))
    746       # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
    747       if error_level > 5:
    748         error_level = 5
    749       error(filename, linenum, 'readability/fn_size', error_level,
    750             'Small and focused functions are preferred:'
    751             ' %s has %d non-comment lines'
    752             ' (error triggered by exceeding %d lines).'  % (
    753                 self.current_function, self.lines_in_function, trigger))
    754 
    755   def End(self):
    756     """Stop analyzing function body."""
    757     self.in_a_function = False
    758 
    759 
    760 class _IncludeError(Exception):
    761   """Indicates a problem with the include order in a file."""
    762   pass
    763 
    764 
    765 class FileInfo:
    766   """Provides utility functions for filenames.
    767 
    768   FileInfo provides easy access to the components of a file's path
    769   relative to the project root.
    770   """
    771 
    772   def __init__(self, filename):
    773     self._filename = filename
    774 
    775   def FullName(self):
    776     """Make Windows paths like Unix."""
    777     return os.path.abspath(self._filename).replace('\\', '/')
    778 
    779   def RepositoryName(self):
    780     """FullName after removing the local path to the repository.
    781 
    782     If we have a real absolute path name here we can try to do something smart:
    783     detecting the root of the checkout and truncating /path/to/checkout from
    784     the name so that we get header guards that don't include things like
    785     "C:\Documents and Settings\..." or "/home/username/..." in them and thus
    786     people on different computers who have checked the source out to different
    787     locations won't see bogus errors.
    788     """
    789     fullname = self.FullName()
    790 
    791     if os.path.exists(fullname):
    792       project_dir = os.path.dirname(fullname)
    793 
    794       if os.path.exists(os.path.join(project_dir, ".svn")):
    795         # If there's a .svn file in the current directory, we recursively look
    796         # up the directory tree for the top of the SVN checkout
    797         root_dir = project_dir
    798         one_up_dir = os.path.dirname(root_dir)
    799         while os.path.exists(os.path.join(one_up_dir, ".svn")):
    800           root_dir = os.path.dirname(root_dir)
    801           one_up_dir = os.path.dirname(one_up_dir)
    802 
    803         prefix = os.path.commonprefix([root_dir, project_dir])
    804         return fullname[len(prefix) + 1:]
    805 
    806       # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
    807       # searching up from the current path.
    808       root_dir = os.path.dirname(fullname)
    809       while (root_dir != os.path.dirname(root_dir) and
    810              not os.path.exists(os.path.join(root_dir, ".git")) and
    811              not os.path.exists(os.path.join(root_dir, ".hg")) and
    812              not os.path.exists(os.path.join(root_dir, ".svn"))):
    813         root_dir = os.path.dirname(root_dir)
    814 
    815       if (os.path.exists(os.path.join(root_dir, ".git")) or
    816           os.path.exists(os.path.join(root_dir, ".hg")) or
    817           os.path.exists(os.path.join(root_dir, ".svn"))):
    818         prefix = os.path.commonprefix([root_dir, project_dir])
    819         # BEGIN android-changed
    820         # return fullname[len(prefix) + 1:]
    821         return "art/" + fullname[len(prefix) + 1:]
    822         # END android-changed
    823 
    824     # Don't know what to do; header guard warnings may be wrong...
    825     return fullname
    826 
    827   def Split(self):
    828     """Splits the file into the directory, basename, and extension.
    829 
    830     For 'chrome/browser/browser.cc', Split() would
    831     return ('chrome/browser', 'browser', '.cc')
    832 
    833     Returns:
    834       A tuple of (directory, basename, extension).
    835     """
    836 
    837     googlename = self.RepositoryName()
    838     project, rest = os.path.split(googlename)
    839     return (project,) + os.path.splitext(rest)
    840 
    841   def BaseName(self):
    842     """File base name - text after the final slash, before the final period."""
    843     return self.Split()[1]
    844 
    845   def Extension(self):
    846     """File extension - text following the final period."""
    847     return self.Split()[2]
    848 
    849   def NoExtension(self):
    850     """File has no source file extension."""
    851     return '/'.join(self.Split()[0:2])
    852 
    853   def IsSource(self):
    854     """File has a source file extension."""
    855     return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
    856 
    857 
    858 def _ShouldPrintError(category, confidence, linenum):
    859   """If confidence >= verbose, category passes filter and is not suppressed."""
    860 
    861   # There are three ways we might decide not to print an error message:
    862   # a "NOLINT(category)" comment appears in the source,
    863   # the verbosity level isn't high enough, or the filters filter it out.
    864   if IsErrorSuppressedByNolint(category, linenum):
    865     return False
    866   if confidence < _cpplint_state.verbose_level:
    867     return False
    868 
    869   is_filtered = False
    870   for one_filter in _Filters():
    871     if one_filter.startswith('-'):
    872       if category.startswith(one_filter[1:]):
    873         is_filtered = True
    874     elif one_filter.startswith('+'):
    875       if category.startswith(one_filter[1:]):
    876         is_filtered = False
    877     else:
    878       assert False  # should have been checked for in SetFilter.
    879   if is_filtered:
    880     return False
    881 
    882   return True
    883 
    884 
    885 def Error(filename, linenum, category, confidence, message):
    886   """Logs the fact we've found a lint error.
    887 
    888   We log where the error was found, and also our confidence in the error,
    889   that is, how certain we are this is a legitimate style regression, and
    890   not a misidentification or a use that's sometimes justified.
    891 
    892   False positives can be suppressed by the use of
    893   "cpplint(category)"  comments on the offending line.  These are
    894   parsed into _error_suppressions.
    895 
    896   Args:
    897     filename: The name of the file containing the error.
    898     linenum: The number of the line containing the error.
    899     category: A string used to describe the "category" this bug
    900       falls under: "whitespace", say, or "runtime".  Categories
    901       may have a hierarchy separated by slashes: "whitespace/indent".
    902     confidence: A number from 1-5 representing a confidence score for
    903       the error, with 5 meaning that we are certain of the problem,
    904       and 1 meaning that it could be a legitimate construct.
    905     message: The error message.
    906   """
    907   if _ShouldPrintError(category, confidence, linenum):
    908     _cpplint_state.IncrementErrorCount(category)
    909     if _cpplint_state.output_format == 'vs7':
    910       sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
    911           filename, linenum, message, category, confidence))
    912     elif _cpplint_state.output_format == 'eclipse':
    913       sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
    914           filename, linenum, message, category, confidence))
    915     else:
    916       sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
    917           filename, linenum, message, category, confidence))
    918 
    919 
    920 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
    921 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
    922     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
    923 # Matches strings.  Escape codes should already be removed by ESCAPES.
    924 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
    925 # Matches characters.  Escape codes should already be removed by ESCAPES.
    926 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
    927 # Matches multi-line C++ comments.
    928 # This RE is a little bit more complicated than one might expect, because we
    929 # have to take care of space removals tools so we can handle comments inside
    930 # statements better.
    931 # The current rule is: We only clear spaces from both sides when we're at the
    932 # end of the line. Otherwise, we try to remove spaces from the right side,
    933 # if this doesn't work we try on left side but only if there's a non-character
    934 # on the right.
    935 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
    936     r"""(\s*/\*.*\*/\s*$|
    937             /\*.*\*/\s+|
    938          \s+/\*.*\*/(?=\W)|
    939             /\*.*\*/)""", re.VERBOSE)
    940 
    941 
    942 def IsCppString(line):
    943   """Does line terminate so, that the next symbol is in string constant.
    944 
    945   This function does not consider single-line nor multi-line comments.
    946 
    947   Args:
    948     line: is a partial line of code starting from the 0..n.
    949 
    950   Returns:
    951     True, if next character appended to 'line' is inside a
    952     string constant.
    953   """
    954 
    955   line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
    956   return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
    957 
    958 
    959 def FindNextMultiLineCommentStart(lines, lineix):
    960   """Find the beginning marker for a multiline comment."""
    961   while lineix < len(lines):
    962     if lines[lineix].strip().startswith('/*'):
    963       # Only return this marker if the comment goes beyond this line
    964       if lines[lineix].strip().find('*/', 2) < 0:
    965         return lineix
    966     lineix += 1
    967   return len(lines)
    968 
    969 
    970 def FindNextMultiLineCommentEnd(lines, lineix):
    971   """We are inside a comment, find the end marker."""
    972   while lineix < len(lines):
    973     if lines[lineix].strip().endswith('*/'):
    974       return lineix
    975     lineix += 1
    976   return len(lines)
    977 
    978 
    979 def RemoveMultiLineCommentsFromRange(lines, begin, end):
    980   """Clears a range of lines for multi-line comments."""
    981   # Having // dummy comments makes the lines non-empty, so we will not get
    982   # unnecessary blank line warnings later in the code.
    983   for i in range(begin, end):
    984     lines[i] = '// dummy'
    985 
    986 
    987 def RemoveMultiLineComments(filename, lines, error):
    988   """Removes multiline (c-style) comments from lines."""
    989   lineix = 0
    990   while lineix < len(lines):
    991     lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
    992     if lineix_begin >= len(lines):
    993       return
    994     lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
    995     if lineix_end >= len(lines):
    996       error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
    997             'Could not find end of multi-line comment')
    998       return
    999     RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
   1000     lineix = lineix_end + 1
   1001 
   1002 
   1003 def CleanseComments(line):
   1004   """Removes //-comments and single-line C-style /* */ comments.
   1005 
   1006   Args:
   1007     line: A line of C++ source.
   1008 
   1009   Returns:
   1010     The line with single-line comments removed.
   1011   """
   1012   commentpos = line.find('//')
   1013   if commentpos != -1 and not IsCppString(line[:commentpos]):
   1014     line = line[:commentpos].rstrip()
   1015   # get rid of /* ... */
   1016   return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
   1017 
   1018 
   1019 class CleansedLines(object):
   1020   """Holds 3 copies of all lines with different preprocessing applied to them.
   1021 
   1022   1) elided member contains lines without strings and comments,
   1023   2) lines member contains lines without comments, and
   1024   3) raw_lines member contains all the lines without processing.
   1025   All these three members are of <type 'list'>, and of the same length.
   1026   """
   1027 
   1028   def __init__(self, lines):
   1029     self.elided = []
   1030     self.lines = []
   1031     self.raw_lines = lines
   1032     self.num_lines = len(lines)
   1033     for linenum in range(len(lines)):
   1034       self.lines.append(CleanseComments(lines[linenum]))
   1035       elided = self._CollapseStrings(lines[linenum])
   1036       self.elided.append(CleanseComments(elided))
   1037 
   1038   def NumLines(self):
   1039     """Returns the number of lines represented."""
   1040     return self.num_lines
   1041 
   1042   @staticmethod
   1043   def _CollapseStrings(elided):
   1044     """Collapses strings and chars on a line to simple "" or '' blocks.
   1045 
   1046     We nix strings first so we're not fooled by text like '"http://"'
   1047 
   1048     Args:
   1049       elided: The line being processed.
   1050 
   1051     Returns:
   1052       The line with collapsed strings.
   1053     """
   1054     if not _RE_PATTERN_INCLUDE.match(elided):
   1055       # Remove escaped characters first to make quote/single quote collapsing
   1056       # basic.  Things that look like escaped characters shouldn't occur
   1057       # outside of strings and chars.
   1058       elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
   1059       elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
   1060       elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
   1061     return elided
   1062 
   1063 
   1064 def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
   1065   """Find the position just after the matching endchar.
   1066 
   1067   Args:
   1068     line: a CleansedLines line.
   1069     startpos: start searching at this position.
   1070     depth: nesting level at startpos.
   1071     startchar: expression opening character.
   1072     endchar: expression closing character.
   1073 
   1074   Returns:
   1075     Index just after endchar.
   1076   """
   1077   for i in xrange(startpos, len(line)):
   1078     if line[i] == startchar:
   1079       depth += 1
   1080     elif line[i] == endchar:
   1081       depth -= 1
   1082       if depth == 0:
   1083         return i + 1
   1084   return -1
   1085 
   1086 
   1087 def CloseExpression(clean_lines, linenum, pos):
   1088   """If input points to ( or { or [, finds the position that closes it.
   1089 
   1090   If lines[linenum][pos] points to a '(' or '{' or '[', finds the
   1091   linenum/pos that correspond to the closing of the expression.
   1092 
   1093   Args:
   1094     clean_lines: A CleansedLines instance containing the file.
   1095     linenum: The number of the line to check.
   1096     pos: A position on the line.
   1097 
   1098   Returns:
   1099     A tuple (line, linenum, pos) pointer *past* the closing brace, or
   1100     (line, len(lines), -1) if we never find a close.  Note we ignore
   1101     strings and comments when matching; and the line we return is the
   1102     'cleansed' line at linenum.
   1103   """
   1104 
   1105   line = clean_lines.elided[linenum]
   1106   startchar = line[pos]
   1107   if startchar not in '({[':
   1108     return (line, clean_lines.NumLines(), -1)
   1109   if startchar == '(': endchar = ')'
   1110   if startchar == '[': endchar = ']'
   1111   if startchar == '{': endchar = '}'
   1112 
   1113   # Check first line
   1114   end_pos = FindEndOfExpressionInLine(line, pos, 0, startchar, endchar)
   1115   if end_pos > -1:
   1116     return (line, linenum, end_pos)
   1117   tail = line[pos:]
   1118   num_open = tail.count(startchar) - tail.count(endchar)
   1119   while linenum < clean_lines.NumLines() - 1:
   1120     linenum += 1
   1121     line = clean_lines.elided[linenum]
   1122     delta = line.count(startchar) - line.count(endchar)
   1123     if num_open + delta <= 0:
   1124       return (line, linenum,
   1125               FindEndOfExpressionInLine(line, 0, num_open, startchar, endchar))
   1126     num_open += delta
   1127 
   1128   # Did not find endchar before end of file, give up
   1129   return (line, clean_lines.NumLines(), -1)
   1130 
   1131 def CheckForCopyright(filename, lines, error):
   1132   """Logs an error if no Copyright message appears at the top of the file."""
   1133 
   1134   # We'll say it should occur by line 10. Don't forget there's a
   1135   # dummy line at the front.
   1136   for line in xrange(1, min(len(lines), 11)):
   1137     if re.search(r'Copyright', lines[line], re.I): break
   1138   else:                       # means no copyright line was found
   1139     error(filename, 0, 'legal/copyright', 5,
   1140           'No copyright message found.  '
   1141           'You should have a line: "Copyright [year] <Copyright Owner>"')
   1142 
   1143 
   1144 def GetHeaderGuardCPPVariable(filename):
   1145   """Returns the CPP variable that should be used as a header guard.
   1146 
   1147   Args:
   1148     filename: The name of a C++ header file.
   1149 
   1150   Returns:
   1151     The CPP variable that should be used as a header guard in the
   1152     named file.
   1153 
   1154   """
   1155 
   1156   # Restores original filename in case that cpplint is invoked from Emacs's
   1157   # flymake.
   1158   filename = re.sub(r'_flymake\.h$', '.h', filename)
   1159   filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
   1160 
   1161   fileinfo = FileInfo(filename)
   1162   file_path_from_root = fileinfo.RepositoryName()
   1163   if _root:
   1164     file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
   1165   return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
   1166 
   1167 
   1168 def CheckForHeaderGuard(filename, lines, error):
   1169   """Checks that the file contains a header guard.
   1170 
   1171   Logs an error if no #ifndef header guard is present.  For other
   1172   headers, checks that the full pathname is used.
   1173 
   1174   Args:
   1175     filename: The name of the C++ header file.
   1176     lines: An array of strings, each representing a line of the file.
   1177     error: The function to call with any errors found.
   1178   """
   1179 
   1180   cppvar = GetHeaderGuardCPPVariable(filename)
   1181 
   1182   ifndef = None
   1183   ifndef_linenum = 0
   1184   define = None
   1185   endif = None
   1186   endif_linenum = 0
   1187   for linenum, line in enumerate(lines):
   1188     linesplit = line.split()
   1189     if len(linesplit) >= 2:
   1190       # find the first occurrence of #ifndef and #define, save arg
   1191       if not ifndef and linesplit[0] == '#ifndef':
   1192         # set ifndef to the header guard presented on the #ifndef line.
   1193         ifndef = linesplit[1]
   1194         ifndef_linenum = linenum
   1195       if not define and linesplit[0] == '#define':
   1196         define = linesplit[1]
   1197     # find the last occurrence of #endif, save entire line
   1198     if line.startswith('#endif'):
   1199       endif = line
   1200       endif_linenum = linenum
   1201 
   1202   if not ifndef:
   1203     error(filename, 0, 'build/header_guard', 5,
   1204           'No #ifndef header guard found, suggested CPP variable is: %s' %
   1205           cppvar)
   1206     return
   1207 
   1208   if not define:
   1209     error(filename, 0, 'build/header_guard', 5,
   1210           'No #define header guard found, suggested CPP variable is: %s' %
   1211           cppvar)
   1212     return
   1213 
   1214   # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
   1215   # for backward compatibility.
   1216   if ifndef != cppvar:
   1217     error_level = 0
   1218     if ifndef != cppvar + '_':
   1219       error_level = 5
   1220 
   1221     ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
   1222                             error)
   1223     error(filename, ifndef_linenum, 'build/header_guard', error_level,
   1224           '#ifndef header guard has wrong style, please use: %s' % cppvar)
   1225 
   1226   if define != ifndef:
   1227     error(filename, 0, 'build/header_guard', 5,
   1228           '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
   1229           cppvar)
   1230     return
   1231 
   1232   if endif != ('#endif  // %s' % cppvar):
   1233     error_level = 0
   1234     if endif != ('#endif  // %s' % (cppvar + '_')):
   1235       error_level = 5
   1236 
   1237     ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
   1238                             error)
   1239     error(filename, endif_linenum, 'build/header_guard', error_level,
   1240           '#endif line should be "#endif  // %s"' % cppvar)
   1241 
   1242 
   1243 def CheckForUnicodeReplacementCharacters(filename, lines, error):
   1244   """Logs an error for each line containing Unicode replacement characters.
   1245 
   1246   These indicate that either the file contained invalid UTF-8 (likely)
   1247   or Unicode replacement characters (which it shouldn't).  Note that
   1248   it's possible for this to throw off line numbering if the invalid
   1249   UTF-8 occurred adjacent to a newline.
   1250 
   1251   Args:
   1252     filename: The name of the current file.
   1253     lines: An array of strings, each representing a line of the file.
   1254     error: The function to call with any errors found.
   1255   """
   1256   for linenum, line in enumerate(lines):
   1257     if u'\ufffd' in line:
   1258       error(filename, linenum, 'readability/utf8', 5,
   1259             'Line contains invalid UTF-8 (or Unicode replacement character).')
   1260 
   1261 
   1262 def CheckForNewlineAtEOF(filename, lines, error):
   1263   """Logs an error if there is no newline char at the end of the file.
   1264 
   1265   Args:
   1266     filename: The name of the current file.
   1267     lines: An array of strings, each representing a line of the file.
   1268     error: The function to call with any errors found.
   1269   """
   1270 
   1271   # The array lines() was created by adding two newlines to the
   1272   # original file (go figure), then splitting on \n.
   1273   # To verify that the file ends in \n, we just have to make sure the
   1274   # last-but-two element of lines() exists and is empty.
   1275   if len(lines) < 3 or lines[-2]:
   1276     error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
   1277           'Could not find a newline character at the end of the file.')
   1278 
   1279 
   1280 def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
   1281   """Logs an error if we see /* ... */ or "..." that extend past one line.
   1282 
   1283   /* ... */ comments are legit inside macros, for one line.
   1284   Otherwise, we prefer // comments, so it's ok to warn about the
   1285   other.  Likewise, it's ok for strings to extend across multiple
   1286   lines, as long as a line continuation character (backslash)
   1287   terminates each line. Although not currently prohibited by the C++
   1288   style guide, it's ugly and unnecessary. We don't do well with either
   1289   in this lint program, so we warn about both.
   1290 
   1291   Args:
   1292     filename: The name of the current file.
   1293     clean_lines: A CleansedLines instance containing the file.
   1294     linenum: The number of the line to check.
   1295     error: The function to call with any errors found.
   1296   """
   1297   line = clean_lines.elided[linenum]
   1298 
   1299   # Remove all \\ (escaped backslashes) from the line. They are OK, and the
   1300   # second (escaped) slash may trigger later \" detection erroneously.
   1301   line = line.replace('\\\\', '')
   1302 
   1303   if line.count('/*') > line.count('*/'):
   1304     error(filename, linenum, 'readability/multiline_comment', 5,
   1305           'Complex multi-line /*...*/-style comment found. '
   1306           'Lint may give bogus warnings.  '
   1307           'Consider replacing these with //-style comments, '
   1308           'with #if 0...#endif, '
   1309           'or with more clearly structured multi-line comments.')
   1310 
   1311   if (line.count('"') - line.count('\\"')) % 2:
   1312     error(filename, linenum, 'readability/multiline_string', 5,
   1313           'Multi-line string ("...") found.  This lint script doesn\'t '
   1314           'do well with such strings, and may give bogus warnings.  They\'re '
   1315           'ugly and unnecessary, and you should use concatenation instead".')
   1316 
   1317 
   1318 threading_list = (
   1319     ('asctime(', 'asctime_r('),
   1320     ('ctime(', 'ctime_r('),
   1321     ('getgrgid(', 'getgrgid_r('),
   1322     ('getgrnam(', 'getgrnam_r('),
   1323     ('getlogin(', 'getlogin_r('),
   1324     ('getpwnam(', 'getpwnam_r('),
   1325     ('getpwuid(', 'getpwuid_r('),
   1326     ('gmtime(', 'gmtime_r('),
   1327     ('localtime(', 'localtime_r('),
   1328     ('rand(', 'rand_r('),
   1329     ('readdir(', 'readdir_r('),
   1330     ('strtok(', 'strtok_r('),
   1331     ('ttyname(', 'ttyname_r('),
   1332     )
   1333 
   1334 
   1335 def CheckPosixThreading(filename, clean_lines, linenum, error):
   1336   """Checks for calls to thread-unsafe functions.
   1337 
   1338   Much code has been originally written without consideration of
   1339   multi-threading. Also, engineers are relying on their old experience;
   1340   they have learned posix before threading extensions were added. These
   1341   tests guide the engineers to use thread-safe functions (when using
   1342   posix directly).
   1343 
   1344   Args:
   1345     filename: The name of the current file.
   1346     clean_lines: A CleansedLines instance containing the file.
   1347     linenum: The number of the line to check.
   1348     error: The function to call with any errors found.
   1349   """
   1350   line = clean_lines.elided[linenum]
   1351   for single_thread_function, multithread_safe_function in threading_list:
   1352     ix = line.find(single_thread_function)
   1353     # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
   1354     if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
   1355                                 line[ix - 1] not in ('_', '.', '>'))):
   1356       error(filename, linenum, 'runtime/threadsafe_fn', 2,
   1357             'Consider using ' + multithread_safe_function +
   1358             '...) instead of ' + single_thread_function +
   1359             '...) for improved thread safety.')
   1360 
   1361 
   1362 # Matches invalid increment: *count++, which moves pointer instead of
   1363 # incrementing a value.
   1364 _RE_PATTERN_INVALID_INCREMENT = re.compile(
   1365     r'^\s*\*\w+(\+\+|--);')
   1366 
   1367 
   1368 def CheckInvalidIncrement(filename, clean_lines, linenum, error):
   1369   """Checks for invalid increment *count++.
   1370 
   1371   For example following function:
   1372   void increment_counter(int* count) {
   1373     *count++;
   1374   }
   1375   is invalid, because it effectively does count++, moving pointer, and should
   1376   be replaced with ++*count, (*count)++ or *count += 1.
   1377 
   1378   Args:
   1379     filename: The name of the current file.
   1380     clean_lines: A CleansedLines instance containing the file.
   1381     linenum: The number of the line to check.
   1382     error: The function to call with any errors found.
   1383   """
   1384   line = clean_lines.elided[linenum]
   1385   if _RE_PATTERN_INVALID_INCREMENT.match(line):
   1386     error(filename, linenum, 'runtime/invalid_increment', 5,
   1387           'Changing pointer instead of value (or unused value of operator*).')
   1388 
   1389 
   1390 class _BlockInfo(object):
   1391   """Stores information about a generic block of code."""
   1392 
   1393   def __init__(self, seen_open_brace):
   1394     self.seen_open_brace = seen_open_brace
   1395     self.open_parentheses = 0
   1396     self.inline_asm = _NO_ASM
   1397 
   1398   def CheckBegin(self, filename, clean_lines, linenum, error):
   1399     """Run checks that applies to text up to the opening brace.
   1400 
   1401     This is mostly for checking the text after the class identifier
   1402     and the "{", usually where the base class is specified.  For other
   1403     blocks, there isn't much to check, so we always pass.
   1404 
   1405     Args:
   1406       filename: The name of the current file.
   1407       clean_lines: A CleansedLines instance containing the file.
   1408       linenum: The number of the line to check.
   1409       error: The function to call with any errors found.
   1410     """
   1411     pass
   1412 
   1413   def CheckEnd(self, filename, clean_lines, linenum, error):
   1414     """Run checks that applies to text after the closing brace.
   1415 
   1416     This is mostly used for checking end of namespace comments.
   1417 
   1418     Args:
   1419       filename: The name of the current file.
   1420       clean_lines: A CleansedLines instance containing the file.
   1421       linenum: The number of the line to check.
   1422       error: The function to call with any errors found.
   1423     """
   1424     pass
   1425 
   1426 
   1427 class _ClassInfo(_BlockInfo):
   1428   """Stores information about a class."""
   1429 
   1430   def __init__(self, name, class_or_struct, clean_lines, linenum):
   1431     _BlockInfo.__init__(self, False)
   1432     self.name = name
   1433     self.starting_linenum = linenum
   1434     self.is_derived = False
   1435     if class_or_struct == 'struct':
   1436       self.access = 'public'
   1437     else:
   1438       self.access = 'private'
   1439 
   1440     # Try to find the end of the class.  This will be confused by things like:
   1441     #   class A {
   1442     #   } *x = { ...
   1443     #
   1444     # But it's still good enough for CheckSectionSpacing.
   1445     self.last_line = 0
   1446     depth = 0
   1447     for i in range(linenum, clean_lines.NumLines()):
   1448       line = clean_lines.elided[i]
   1449       depth += line.count('{') - line.count('}')
   1450       if not depth:
   1451         self.last_line = i
   1452         break
   1453 
   1454   def CheckBegin(self, filename, clean_lines, linenum, error):
   1455     # Look for a bare ':'
   1456     if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
   1457       self.is_derived = True
   1458 
   1459 
   1460 class _NamespaceInfo(_BlockInfo):
   1461   """Stores information about a namespace."""
   1462 
   1463   def __init__(self, name, linenum):
   1464     _BlockInfo.__init__(self, False)
   1465     self.name = name or ''
   1466     self.starting_linenum = linenum
   1467 
   1468   def CheckEnd(self, filename, clean_lines, linenum, error):
   1469     """Check end of namespace comments."""
   1470     line = clean_lines.raw_lines[linenum]
   1471 
   1472     # Check how many lines is enclosed in this namespace.  Don't issue
   1473     # warning for missing namespace comments if there aren't enough
   1474     # lines.  However, do apply checks if there is already an end of
   1475     # namespace comment and it's incorrect.
   1476     #
   1477     # TODO(unknown): We always want to check end of namespace comments
   1478     # if a namespace is large, but sometimes we also want to apply the
   1479     # check if a short namespace contained nontrivial things (something
   1480     # other than forward declarations).  There is currently no logic on
   1481     # deciding what these nontrivial things are, so this check is
   1482     # triggered by namespace size only, which works most of the time.
   1483     if (linenum - self.starting_linenum < 10
   1484         and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
   1485       return
   1486 
   1487     # Look for matching comment at end of namespace.
   1488     #
   1489     # Note that we accept C style "/* */" comments for terminating
   1490     # namespaces, so that code that terminate namespaces inside
   1491     # preprocessor macros can be cpplint clean.  Example: http://go/nxpiz
   1492     #
   1493     # We also accept stuff like "// end of namespace <name>." with the
   1494     # period at the end.
   1495     #
   1496     # Besides these, we don't accept anything else, otherwise we might
   1497     # get false negatives when existing comment is a substring of the
   1498     # expected namespace.  Example: http://go/ldkdc, http://cl/23548205
   1499     if self.name:
   1500       # Named namespace
   1501       if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
   1502                     r'[\*/\.\\\s]*$'),
   1503                    line):
   1504         error(filename, linenum, 'readability/namespace', 5,
   1505               'Namespace should be terminated with "// namespace %s"' %
   1506               self.name)
   1507     else:
   1508       # Anonymous namespace
   1509       if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
   1510         error(filename, linenum, 'readability/namespace', 5,
   1511               'Namespace should be terminated with "// namespace"')
   1512 
   1513 
   1514 class _PreprocessorInfo(object):
   1515   """Stores checkpoints of nesting stacks when #if/#else is seen."""
   1516 
   1517   def __init__(self, stack_before_if):
   1518     # The entire nesting stack before #if
   1519     self.stack_before_if = stack_before_if
   1520 
   1521     # The entire nesting stack up to #else
   1522     self.stack_before_else = []
   1523 
   1524     # Whether we have already seen #else or #elif
   1525     self.seen_else = False
   1526 
   1527 
   1528 class _NestingState(object):
   1529   """Holds states related to parsing braces."""
   1530 
   1531   def __init__(self):
   1532     # Stack for tracking all braces.  An object is pushed whenever we
   1533     # see a "{", and popped when we see a "}".  Only 3 types of
   1534     # objects are possible:
   1535     # - _ClassInfo: a class or struct.
   1536     # - _NamespaceInfo: a namespace.
   1537     # - _BlockInfo: some other type of block.
   1538     self.stack = []
   1539 
   1540     # Stack of _PreprocessorInfo objects.
   1541     self.pp_stack = []
   1542 
   1543   def SeenOpenBrace(self):
   1544     """Check if we have seen the opening brace for the innermost block.
   1545 
   1546     Returns:
   1547       True if we have seen the opening brace, False if the innermost
   1548       block is still expecting an opening brace.
   1549     """
   1550     return (not self.stack) or self.stack[-1].seen_open_brace
   1551 
   1552   def InNamespaceBody(self):
   1553     """Check if we are currently one level inside a namespace body.
   1554 
   1555     Returns:
   1556       True if top of the stack is a namespace block, False otherwise.
   1557     """
   1558     return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
   1559 
   1560   def UpdatePreprocessor(self, line):
   1561     """Update preprocessor stack.
   1562 
   1563     We need to handle preprocessors due to classes like this:
   1564       #ifdef SWIG
   1565       struct ResultDetailsPageElementExtensionPoint {
   1566       #else
   1567       struct ResultDetailsPageElementExtensionPoint : public Extension {
   1568       #endif
   1569     (see http://go/qwddn for original example)
   1570 
   1571     We make the following assumptions (good enough for most files):
   1572     - Preprocessor condition evaluates to true from #if up to first
   1573       #else/#elif/#endif.
   1574 
   1575     - Preprocessor condition evaluates to false from #else/#elif up
   1576       to #endif.  We still perform lint checks on these lines, but
   1577       these do not affect nesting stack.
   1578 
   1579     Args:
   1580       line: current line to check.
   1581     """
   1582     if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
   1583       # Beginning of #if block, save the nesting stack here.  The saved
   1584       # stack will allow us to restore the parsing state in the #else case.
   1585       self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
   1586     elif Match(r'^\s*#\s*(else|elif)\b', line):
   1587       # Beginning of #else block
   1588       if self.pp_stack:
   1589         if not self.pp_stack[-1].seen_else:
   1590           # This is the first #else or #elif block.  Remember the
   1591           # whole nesting stack up to this point.  This is what we
   1592           # keep after the #endif.
   1593           self.pp_stack[-1].seen_else = True
   1594           self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
   1595 
   1596         # Restore the stack to how it was before the #if
   1597         self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
   1598       else:
   1599         # TODO(unknown): unexpected #else, issue warning?
   1600         pass
   1601     elif Match(r'^\s*#\s*endif\b', line):
   1602       # End of #if or #else blocks.
   1603       if self.pp_stack:
   1604         # If we saw an #else, we will need to restore the nesting
   1605         # stack to its former state before the #else, otherwise we
   1606         # will just continue from where we left off.
   1607         if self.pp_stack[-1].seen_else:
   1608           # Here we can just use a shallow copy since we are the last
   1609           # reference to it.
   1610           self.stack = self.pp_stack[-1].stack_before_else
   1611         # Drop the corresponding #if
   1612         self.pp_stack.pop()
   1613       else:
   1614         # TODO(unknown): unexpected #endif, issue warning?
   1615         pass
   1616 
   1617   def Update(self, filename, clean_lines, linenum, error):
   1618     """Update nesting state with current line.
   1619 
   1620     Args:
   1621       filename: The name of the current file.
   1622       clean_lines: A CleansedLines instance containing the file.
   1623       linenum: The number of the line to check.
   1624       error: The function to call with any errors found.
   1625     """
   1626     line = clean_lines.elided[linenum]
   1627 
   1628     # Update pp_stack first
   1629     self.UpdatePreprocessor(line)
   1630 
   1631     # Count parentheses.  This is to avoid adding struct arguments to
   1632     # the nesting stack.
   1633     if self.stack:
   1634       inner_block = self.stack[-1]
   1635       depth_change = line.count('(') - line.count(')')
   1636       inner_block.open_parentheses += depth_change
   1637 
   1638       # Also check if we are starting or ending an inline assembly block.
   1639       if inner_block.inline_asm in (_NO_ASM, _END_ASM):
   1640         if (depth_change != 0 and
   1641             inner_block.open_parentheses == 1 and
   1642             _MATCH_ASM.match(line)):
   1643           # Enter assembly block
   1644           inner_block.inline_asm = _INSIDE_ASM
   1645         else:
   1646           # Not entering assembly block.  If previous line was _END_ASM,
   1647           # we will now shift to _NO_ASM state.
   1648           inner_block.inline_asm = _NO_ASM
   1649       elif (inner_block.inline_asm == _INSIDE_ASM and
   1650             inner_block.open_parentheses == 0):
   1651         # Exit assembly block
   1652         inner_block.inline_asm = _END_ASM
   1653 
   1654     # Consume namespace declaration at the beginning of the line.  Do
   1655     # this in a loop so that we catch same line declarations like this:
   1656     #   namespace proto2 { namespace bridge { class MessageSet; } }
   1657     while True:
   1658       # Match start of namespace.  The "\b\s*" below catches namespace
   1659       # declarations even if it weren't followed by a whitespace, this
   1660       # is so that we don't confuse our namespace checker.  The
   1661       # missing spaces will be flagged by CheckSpacing.
   1662       namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
   1663       if not namespace_decl_match:
   1664         break
   1665 
   1666       new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
   1667       self.stack.append(new_namespace)
   1668 
   1669       line = namespace_decl_match.group(2)
   1670       if line.find('{') != -1:
   1671         new_namespace.seen_open_brace = True
   1672         line = line[line.find('{') + 1:]
   1673 
   1674     # Look for a class declaration in whatever is left of the line
   1675     # after parsing namespaces.  The regexp accounts for decorated classes
   1676     # such as in:
   1677     #   class LOCKABLE API Object {
   1678     #   };
   1679     #
   1680     # Templates with class arguments may confuse the parser, for example:
   1681     #   template <class T
   1682     #             class Comparator = less<T>,
   1683     #             class Vector = vector<T> >
   1684     #   class HeapQueue {
   1685     #
   1686     # Because this parser has no nesting state about templates, by the
   1687     # time it saw "class Comparator", it may think that it's a new class.
   1688     # Nested templates have a similar problem:
   1689     #   template <
   1690     #       typename ExportedType,
   1691     #       typename TupleType,
   1692     #       template <typename, typename> class ImplTemplate>
   1693     #
   1694     # To avoid these cases, we ignore classes that are followed by '=' or '>'
   1695     class_decl_match = Match(
   1696         r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
   1697         '(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
   1698         '(([^=>]|<[^<>]*>)*)$', line)
   1699     if (class_decl_match and
   1700         (not self.stack or self.stack[-1].open_parentheses == 0)):
   1701       self.stack.append(_ClassInfo(
   1702           class_decl_match.group(4), class_decl_match.group(2),
   1703           clean_lines, linenum))
   1704       line = class_decl_match.group(5)
   1705 
   1706     # If we have not yet seen the opening brace for the innermost block,
   1707     # run checks here.
   1708     if not self.SeenOpenBrace():
   1709       self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
   1710 
   1711     # Update access control if we are inside a class/struct
   1712     if self.stack and isinstance(self.stack[-1], _ClassInfo):
   1713       access_match = Match(r'\s*(public|private|protected)\s*:', line)
   1714       if access_match:
   1715         self.stack[-1].access = access_match.group(1)
   1716 
   1717     # Consume braces or semicolons from what's left of the line
   1718     while True:
   1719       # Match first brace, semicolon, or closed parenthesis.
   1720       matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
   1721       if not matched:
   1722         break
   1723 
   1724       token = matched.group(1)
   1725       if token == '{':
   1726         # If namespace or class hasn't seen a opening brace yet, mark
   1727         # namespace/class head as complete.  Push a new block onto the
   1728         # stack otherwise.
   1729         if not self.SeenOpenBrace():
   1730           self.stack[-1].seen_open_brace = True
   1731         else:
   1732           self.stack.append(_BlockInfo(True))
   1733           if _MATCH_ASM.match(line):
   1734             self.stack[-1].inline_asm = _BLOCK_ASM
   1735       elif token == ';' or token == ')':
   1736         # If we haven't seen an opening brace yet, but we already saw
   1737         # a semicolon, this is probably a forward declaration.  Pop
   1738         # the stack for these.
   1739         #
   1740         # Similarly, if we haven't seen an opening brace yet, but we
   1741         # already saw a closing parenthesis, then these are probably
   1742         # function arguments with extra "class" or "struct" keywords.
   1743         # Also pop these stack for these.
   1744         if not self.SeenOpenBrace():
   1745           self.stack.pop()
   1746       else:  # token == '}'
   1747         # Perform end of block checks and pop the stack.
   1748         if self.stack:
   1749           self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
   1750           self.stack.pop()
   1751       line = matched.group(2)
   1752 
   1753   def InnermostClass(self):
   1754     """Get class info on the top of the stack.
   1755 
   1756     Returns:
   1757       A _ClassInfo object if we are inside a class, or None otherwise.
   1758     """
   1759     for i in range(len(self.stack), 0, -1):
   1760       classinfo = self.stack[i - 1]
   1761       if isinstance(classinfo, _ClassInfo):
   1762         return classinfo
   1763     return None
   1764 
   1765   def CheckClassFinished(self, filename, error):
   1766     """Checks that all classes have been completely parsed.
   1767 
   1768     Call this when all lines in a file have been processed.
   1769     Args:
   1770       filename: The name of the current file.
   1771       error: The function to call with any errors found.
   1772     """
   1773     # Note: This test can result in false positives if #ifdef constructs
   1774     # get in the way of brace matching. See the testBuildClass test in
   1775     # cpplint_unittest.py for an example of this.
   1776     for obj in self.stack:
   1777       if isinstance(obj, _ClassInfo):
   1778         error(filename, obj.starting_linenum, 'build/class', 5,
   1779               'Failed to find complete declaration of class %s' %
   1780               obj.name)
   1781 
   1782 
   1783 def CheckForNonStandardConstructs(filename, clean_lines, linenum,
   1784                                   nesting_state, error):
   1785   """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
   1786 
   1787   Complain about several constructs which gcc-2 accepts, but which are
   1788   not standard C++.  Warning about these in lint is one way to ease the
   1789   transition to new compilers.
   1790   - put storage class first (e.g. "static const" instead of "const static").
   1791   - "%lld" instead of %qd" in printf-type functions.
   1792   - "%1$d" is non-standard in printf-type functions.
   1793   - "\%" is an undefined character escape sequence.
   1794   - text after #endif is not allowed.
   1795   - invalid inner-style forward declaration.
   1796   - >? and <? operators, and their >?= and <?= cousins.
   1797 
   1798   Additionally, check for constructor/destructor style violations and reference
   1799   members, as it is very convenient to do so while checking for
   1800   gcc-2 compliance.
   1801 
   1802   Args:
   1803     filename: The name of the current file.
   1804     clean_lines: A CleansedLines instance containing the file.
   1805     linenum: The number of the line to check.
   1806     nesting_state: A _NestingState instance which maintains information about
   1807                    the current stack of nested blocks being parsed.
   1808     error: A callable to which errors are reported, which takes 4 arguments:
   1809            filename, line number, error level, and message
   1810   """
   1811 
   1812   # Remove comments from the line, but leave in strings for now.
   1813   line = clean_lines.lines[linenum]
   1814 
   1815   if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
   1816     error(filename, linenum, 'runtime/printf_format', 3,
   1817           '%q in format strings is deprecated.  Use %ll instead.')
   1818 
   1819   if Search(r'printf\s*\(.*".*%\d+\$', line):
   1820     error(filename, linenum, 'runtime/printf_format', 2,
   1821           '%N$ formats are unconventional.  Try rewriting to avoid them.')
   1822 
   1823   # Remove escaped backslashes before looking for undefined escapes.
   1824   line = line.replace('\\\\', '')
   1825 
   1826   if Search(r'("|\').*\\(%|\[|\(|{)', line):
   1827     error(filename, linenum, 'build/printf_format', 3,
   1828           '%, [, (, and { are undefined character escapes.  Unescape them.')
   1829 
   1830   # For the rest, work with both comments and strings removed.
   1831   line = clean_lines.elided[linenum]
   1832 
   1833   if Search(r'\b(const|volatile|void|char|short|int|long'
   1834             r'|float|double|signed|unsigned'
   1835             r'|schar|u?int8|u?int16|u?int32|u?int64)'
   1836             r'\s+(register|static|extern|typedef)\b',
   1837             line):
   1838     error(filename, linenum, 'build/storage_class', 5,
   1839           'Storage class (static, extern, typedef, etc) should be first.')
   1840 
   1841   if Match(r'\s*#\s*endif\s*[^/\s]+', line):
   1842     error(filename, linenum, 'build/endif_comment', 5,
   1843           'Uncommented text after #endif is non-standard.  Use a comment.')
   1844 
   1845   if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
   1846     error(filename, linenum, 'build/forward_decl', 5,
   1847           'Inner-style forward declarations are invalid.  Remove this line.')
   1848 
   1849   if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
   1850             line):
   1851     error(filename, linenum, 'build/deprecated', 3,
   1852           '>? and <? (max and min) operators are non-standard and deprecated.')
   1853 
   1854   if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
   1855     # TODO(unknown): Could it be expanded safely to arbitrary references,
   1856     # without triggering too many false positives? The first
   1857     # attempt triggered 5 warnings for mostly benign code in the regtest, hence
   1858     # the restriction.
   1859     # Here's the original regexp, for the reference:
   1860     # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
   1861     # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
   1862     error(filename, linenum, 'runtime/member_string_references', 2,
   1863           'const string& members are dangerous. It is much better to use '
   1864           'alternatives, such as pointers or simple constants.')
   1865 
   1866   # Everything else in this function operates on class declarations.
   1867   # Return early if the top of the nesting stack is not a class, or if
   1868   # the class head is not completed yet.
   1869   classinfo = nesting_state.InnermostClass()
   1870   if not classinfo or not classinfo.seen_open_brace:
   1871     return
   1872 
   1873   # The class may have been declared with namespace or classname qualifiers.
   1874   # The constructor and destructor will not have those qualifiers.
   1875   base_classname = classinfo.name.split('::')[-1]
   1876 
   1877   # Look for single-argument constructors that aren't marked explicit.
   1878   # Technically a valid construct, but against style.
   1879   args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
   1880                % re.escape(base_classname),
   1881                line)
   1882   if (args and
   1883       args.group(1) != 'void' and
   1884       not Match(r'(const\s+)?%s\s*(?:<\w+>\s*)?&' % re.escape(base_classname),
   1885                 args.group(1).strip())):
   1886     error(filename, linenum, 'runtime/explicit', 5,
   1887           'Single-argument constructors should be marked explicit.')
   1888 
   1889 
   1890 def CheckSpacingForFunctionCall(filename, line, linenum, error):
   1891   """Checks for the correctness of various spacing around function calls.
   1892 
   1893   Args:
   1894     filename: The name of the current file.
   1895     line: The text of the line to check.
   1896     linenum: The number of the line to check.
   1897     error: The function to call with any errors found.
   1898   """
   1899 
   1900   # Since function calls often occur inside if/for/while/switch
   1901   # expressions - which have their own, more liberal conventions - we
   1902   # first see if we should be looking inside such an expression for a
   1903   # function call, to which we can apply more strict standards.
   1904   fncall = line    # if there's no control flow construct, look at whole line
   1905   for pattern in (r'\bif\s*\((.*)\)\s*{',
   1906                   r'\bfor\s*\((.*)\)\s*{',
   1907                   r'\bwhile\s*\((.*)\)\s*[{;]',
   1908                   r'\bswitch\s*\((.*)\)\s*{'):
   1909     match = Search(pattern, line)
   1910     if match:
   1911       fncall = match.group(1)    # look inside the parens for function calls
   1912       break
   1913 
   1914   # Except in if/for/while/switch, there should never be space
   1915   # immediately inside parens (eg "f( 3, 4 )").  We make an exception
   1916   # for nested parens ( (a+b) + c ).  Likewise, there should never be
   1917   # a space before a ( when it's a function argument.  I assume it's a
   1918   # function argument when the char before the whitespace is legal in
   1919   # a function name (alnum + _) and we're not starting a macro. Also ignore
   1920   # pointers and references to arrays and functions coz they're too tricky:
   1921   # we use a very simple way to recognize these:
   1922   # " (something)(maybe-something)" or
   1923   # " (something)(maybe-something," or
   1924   # " (something)[something]"
   1925   # Note that we assume the contents of [] to be short enough that
   1926   # they'll never need to wrap.
   1927   if (  # Ignore control structures.
   1928       # BEGIN android-changed
   1929       # not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
   1930       not Search(r'\b(if|for|while|switch|return|delete|new)\b', fncall) and
   1931       # END android-changed
   1932       # Ignore pointers/references to functions.
   1933       not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
   1934       # Ignore pointers/references to arrays.
   1935       not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
   1936     if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
   1937       error(filename, linenum, 'whitespace/parens', 4,
   1938             'Extra space after ( in function call')
   1939     elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
   1940       error(filename, linenum, 'whitespace/parens', 2,
   1941             'Extra space after (')
   1942     if (Search(r'\w\s+\(', fncall) and
   1943         not Search(r'#\s*define|typedef', fncall) and
   1944         not Search(r'\w\s+\((\w+::)?\*\w+\)\(', fncall)):
   1945       error(filename, linenum, 'whitespace/parens', 4,
   1946             'Extra space before ( in function call')
   1947     # If the ) is followed only by a newline or a { + newline, assume it's
   1948     # part of a control statement (if/while/etc), and don't complain
   1949     if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
   1950       # If the closing parenthesis is preceded by only whitespaces,
   1951       # try to give a more descriptive error message.
   1952       if Search(r'^\s+\)', fncall):
   1953         error(filename, linenum, 'whitespace/parens', 2,
   1954               'Closing ) should be moved to the previous line')
   1955       else:
   1956         error(filename, linenum, 'whitespace/parens', 2,
   1957               'Extra space before )')
   1958 
   1959 
   1960 def IsBlankLine(line):
   1961   """Returns true if the given line is blank.
   1962 
   1963   We consider a line to be blank if the line is empty or consists of
   1964   only white spaces.
   1965 
   1966   Args:
   1967     line: A line of a string.
   1968 
   1969   Returns:
   1970     True, if the given line is blank.
   1971   """
   1972   return not line or line.isspace()
   1973 
   1974 
   1975 def CheckForFunctionLengths(filename, clean_lines, linenum,
   1976                             function_state, error):
   1977   """Reports for long function bodies.
   1978 
   1979   For an overview why this is done, see:
   1980   http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
   1981 
   1982   Uses a simplistic algorithm assuming other style guidelines
   1983   (especially spacing) are followed.
   1984   Only checks unindented functions, so class members are unchecked.
   1985   Trivial bodies are unchecked, so constructors with huge initializer lists
   1986   may be missed.
   1987   Blank/comment lines are not counted so as to avoid encouraging the removal
   1988   of vertical space and comments just to get through a lint check.
   1989   NOLINT *on the last line of a function* disables this check.
   1990 
   1991   Args:
   1992     filename: The name of the current file.
   1993     clean_lines: A CleansedLines instance containing the file.
   1994     linenum: The number of the line to check.
   1995     function_state: Current function name and lines in body so far.
   1996     error: The function to call with any errors found.
   1997   """
   1998   lines = clean_lines.lines
   1999   line = lines[linenum]
   2000   raw = clean_lines.raw_lines
   2001   raw_line = raw[linenum]
   2002   joined_line = ''
   2003 
   2004   starting_func = False
   2005   regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
   2006   match_result = Match(regexp, line)
   2007   if match_result:
   2008     # If the name is all caps and underscores, figure it's a macro and
   2009     # ignore it, unless it's TEST or TEST_F.
   2010     function_name = match_result.group(1).split()[-1]
   2011     if function_name == 'TEST' or function_name == 'TEST_F' or (
   2012         not Match(r'[A-Z_]+$', function_name)):
   2013       starting_func = True
   2014 
   2015   if starting_func:
   2016     body_found = False
   2017     for start_linenum in xrange(linenum, clean_lines.NumLines()):
   2018       start_line = lines[start_linenum]
   2019       joined_line += ' ' + start_line.lstrip()
   2020       if Search(r'(;|})', start_line):  # Declarations and trivial functions
   2021         body_found = True
   2022         break                              # ... ignore
   2023       elif Search(r'{', start_line):
   2024         body_found = True
   2025         function = Search(r'((\w|:)*)\(', line).group(1)
   2026         if Match(r'TEST', function):    # Handle TEST... macros
   2027           parameter_regexp = Search(r'(\(.*\))', joined_line)
   2028           if parameter_regexp:             # Ignore bad syntax
   2029             function += parameter_regexp.group(1)
   2030         else:
   2031           function += '()'
   2032         function_state.Begin(function)
   2033         break
   2034     if not body_found:
   2035       # No body for the function (or evidence of a non-function) was found.
   2036       error(filename, linenum, 'readability/fn_size', 5,
   2037             'Lint failed to find start of function body.')
   2038   elif Match(r'^\}\s*$', line):  # function end
   2039     function_state.Check(error, filename, linenum)
   2040     function_state.End()
   2041   elif not Match(r'^\s*$', line):
   2042     function_state.Count()  # Count non-blank/non-comment lines.
   2043 
   2044 
   2045 _RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
   2046 
   2047 
   2048 def CheckComment(comment, filename, linenum, error):
   2049   """Checks for common mistakes in TODO comments.
   2050 
   2051   Args:
   2052     comment: The text of the comment from the line in question.
   2053     filename: The name of the current file.
   2054     linenum: The number of the line to check.
   2055     error: The function to call with any errors found.
   2056   """
   2057   match = _RE_PATTERN_TODO.match(comment)
   2058   if match:
   2059     # One whitespace is correct; zero whitespace is handled elsewhere.
   2060     leading_whitespace = match.group(1)
   2061     if len(leading_whitespace) > 1:
   2062       error(filename, linenum, 'whitespace/todo', 2,
   2063             'Too many spaces before TODO')
   2064 
   2065     username = match.group(2)
   2066     if not username:
   2067       error(filename, linenum, 'readability/todo', 2,
   2068             'Missing username in TODO; it should look like '
   2069             '"// TODO(my_username): Stuff."')
   2070 
   2071     middle_whitespace = match.group(3)
   2072     # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
   2073     if middle_whitespace != ' ' and middle_whitespace != '':
   2074       error(filename, linenum, 'whitespace/todo', 2,
   2075             'TODO(my_username) should be followed by a space')
   2076 
   2077 def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
   2078   """Checks for improper use of DISALLOW* macros.
   2079 
   2080   Args:
   2081     filename: The name of the current file.
   2082     clean_lines: A CleansedLines instance containing the file.
   2083     linenum: The number of the line to check.
   2084     nesting_state: A _NestingState instance which maintains information about
   2085                    the current stack of nested blocks being parsed.
   2086     error: The function to call with any errors found.
   2087   """
   2088   line = clean_lines.elided[linenum]  # get rid of comments and strings
   2089 
   2090   matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
   2091                    r'DISALLOW_EVIL_CONSTRUCTORS|'
   2092                    r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
   2093   if not matched:
   2094     return
   2095   if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
   2096     if nesting_state.stack[-1].access != 'private':
   2097       error(filename, linenum, 'readability/constructors', 3,
   2098             '%s must be in the private: section' % matched.group(1))
   2099 
   2100   else:
   2101     # Found DISALLOW* macro outside a class declaration, or perhaps it
   2102     # was used inside a function when it should have been part of the
   2103     # class declaration.  We could issue a warning here, but it
   2104     # probably resulted in a compiler error already.
   2105     pass
   2106 
   2107 
   2108 def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
   2109   """Find the corresponding > to close a template.
   2110 
   2111   Args:
   2112     clean_lines: A CleansedLines instance containing the file.
   2113     linenum: Current line number.
   2114     init_suffix: Remainder of the current line after the initial <.
   2115 
   2116   Returns:
   2117     True if a matching bracket exists.
   2118   """
   2119   line = init_suffix
   2120   nesting_stack = ['<']
   2121   while True:
   2122     # Find the next operator that can tell us whether < is used as an
   2123     # opening bracket or as a less-than operator.  We only want to
   2124     # warn on the latter case.
   2125     #
   2126     # We could also check all other operators and terminate the search
   2127     # early, e.g. if we got something like this "a<b+c", the "<" is
   2128     # most likely a less-than operator, but then we will get false
   2129     # positives for default arguments (e.g. http://go/prccd) and
   2130     # other template expressions (e.g. http://go/oxcjq).
   2131     match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
   2132     if match:
   2133       # Found an operator, update nesting stack
   2134       operator = match.group(1)
   2135       line = match.group(2)
   2136 
   2137       if nesting_stack[-1] == '<':
   2138         # Expecting closing angle bracket
   2139         if operator in ('<', '(', '['):
   2140           nesting_stack.append(operator)
   2141         elif operator == '>':
   2142           nesting_stack.pop()
   2143           if not nesting_stack:
   2144             # Found matching angle bracket
   2145             return True
   2146         elif operator == ',':
   2147           # Got a comma after a bracket, this is most likely a template
   2148           # argument.  We have not seen a closing angle bracket yet, but
   2149           # it's probably a few lines later if we look for it, so just
   2150           # return early here.
   2151           return True
   2152         else:
   2153           # Got some other operator.
   2154           return False
   2155 
   2156       else:
   2157         # Expecting closing parenthesis or closing bracket
   2158         if operator in ('<', '(', '['):
   2159           nesting_stack.append(operator)
   2160         elif operator in (')', ']'):
   2161           # We don't bother checking for matching () or [].  If we got
   2162           # something like (] or [), it would have been a syntax error.
   2163           nesting_stack.pop()
   2164 
   2165     else:
   2166       # Scan the next line
   2167       linenum += 1
   2168       if linenum >= len(clean_lines.elided):
   2169         break
   2170       line = clean_lines.elided[linenum]
   2171 
   2172   # Exhausted all remaining lines and still no matching angle bracket.
   2173   # Most likely the input was incomplete, otherwise we should have
   2174   # seen a semicolon and returned early.
   2175   return True
   2176 
   2177 
   2178 def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
   2179   """Find the corresponding < that started a template.
   2180 
   2181   Args:
   2182     clean_lines: A CleansedLines instance containing the file.
   2183     linenum: Current line number.
   2184     init_prefix: Part of the current line before the initial >.
   2185 
   2186   Returns:
   2187     True if a matching bracket exists.
   2188   """
   2189   line = init_prefix
   2190   nesting_stack = ['>']
   2191   while True:
   2192     # Find the previous operator
   2193     match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
   2194     if match:
   2195       # Found an operator, update nesting stack
   2196       operator = match.group(2)
   2197       line = match.group(1)
   2198 
   2199       if nesting_stack[-1] == '>':
   2200         # Expecting opening angle bracket
   2201         if operator in ('>', ')', ']'):
   2202           nesting_stack.append(operator)
   2203         elif operator == '<':
   2204           nesting_stack.pop()
   2205           if not nesting_stack:
   2206             # Found matching angle bracket
   2207             return True
   2208         elif operator == ',':
   2209           # Got a comma before a bracket, this is most likely a
   2210           # template argument.  The opening angle bracket is probably
   2211           # there if we look for it, so just return early here.
   2212           return True
   2213         else:
   2214           # Got some other operator.
   2215           return False
   2216 
   2217       else:
   2218         # Expecting opening parenthesis or opening bracket
   2219         if operator in ('>', ')', ']'):
   2220           nesting_stack.append(operator)
   2221         elif operator in ('(', '['):
   2222           nesting_stack.pop()
   2223 
   2224     else:
   2225       # Scan the previous line
   2226       linenum -= 1
   2227       if linenum < 0:
   2228         break
   2229       line = clean_lines.elided[linenum]
   2230 
   2231   # Exhausted all earlier lines and still no matching angle bracket.
   2232   return False
   2233 
   2234 
   2235 def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
   2236   """Checks for the correctness of various spacing issues in the code.
   2237 
   2238   Things we check for: spaces around operators, spaces after
   2239   if/for/while/switch, no spaces around parens in function calls, two
   2240   spaces between code and comment, don't start a block with a blank
   2241   line, don't end a function with a blank line, don't add a blank line
   2242   after public/protected/private, don't have too many blank lines in a row.
   2243 
   2244   Args:
   2245     filename: The name of the current file.
   2246     clean_lines: A CleansedLines instance containing the file.
   2247     linenum: The number of the line to check.
   2248     nesting_state: A _NestingState instance which maintains information about
   2249                    the current stack of nested blocks being parsed.
   2250     error: The function to call with any errors found.
   2251   """
   2252 
   2253   raw = clean_lines.raw_lines
   2254   line = raw[linenum]
   2255 
   2256   # Before nixing comments, check if the line is blank for no good
   2257   # reason.  This includes the first line after a block is opened, and
   2258   # blank lines at the end of a function (ie, right before a line like '}'
   2259   #
   2260   # Skip all the blank line checks if we are immediately inside a
   2261   # namespace body.  In other words, don't issue blank line warnings
   2262   # for this block:
   2263   #   namespace {
   2264   #
   2265   #   }
   2266   #
   2267   # A warning about missing end of namespace comments will be issued instead.
   2268   if IsBlankLine(line) and not nesting_state.InNamespaceBody():
   2269     elided = clean_lines.elided
   2270     prev_line = elided[linenum - 1]
   2271     prevbrace = prev_line.rfind('{')
   2272     # TODO(unknown): Don't complain if line before blank line, and line after,
   2273     #                both start with alnums and are indented the same amount.
   2274     #                This ignores whitespace at the start of a namespace block
   2275     #                because those are not usually indented.
   2276     if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
   2277       # OK, we have a blank line at the start of a code block.  Before we
   2278       # complain, we check if it is an exception to the rule: The previous
   2279       # non-empty line has the parameters of a function header that are indented
   2280       # 4 spaces (because they did not fit in a 80 column line when placed on
   2281       # the same line as the function name).  We also check for the case where
   2282       # the previous line is indented 6 spaces, which may happen when the
   2283       # initializers of a constructor do not fit into a 80 column line.
   2284       exception = False
   2285       if Match(r' {6}\w', prev_line):  # Initializer list?
   2286         # We are looking for the opening column of initializer list, which
   2287         # should be indented 4 spaces to cause 6 space indentation afterwards.
   2288         search_position = linenum-2
   2289         while (search_position >= 0
   2290                and Match(r' {6}\w', elided[search_position])):
   2291           search_position -= 1
   2292         exception = (search_position >= 0
   2293                      and elided[search_position][:5] == '    :')
   2294       else:
   2295         # Search for the function arguments or an initializer list.  We use a
   2296         # simple heuristic here: If the line is indented 4 spaces; and we have a
   2297         # closing paren, without the opening paren, followed by an opening brace
   2298         # or colon (for initializer lists) we assume that it is the last line of
   2299         # a function header.  If we have a colon indented 4 spaces, it is an
   2300         # initializer list.
   2301         exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
   2302                            prev_line)
   2303                      or Match(r' {4}:', prev_line))
   2304 
   2305       if not exception:
   2306         error(filename, linenum, 'whitespace/blank_line', 2,
   2307               'Blank line at the start of a code block.  Is this needed?')
   2308     # Ignore blank lines at the end of a block in a long if-else
   2309     # chain, like this:
   2310     #   if (condition1) {
   2311     #     // Something followed by a blank line
   2312     #
   2313     #   } else if (condition2) {
   2314     #     // Something else
   2315     #   }
   2316     if linenum + 1 < clean_lines.NumLines():
   2317       next_line = raw[linenum + 1]
   2318       if (next_line
   2319           and Match(r'\s*}', next_line)
   2320           and next_line.find('} else ') == -1):
   2321         error(filename, linenum, 'whitespace/blank_line', 3,
   2322               'Blank line at the end of a code block.  Is this needed?')
   2323 
   2324     matched = Match(r'\s*(public|protected|private):', prev_line)
   2325     if matched:
   2326       error(filename, linenum, 'whitespace/blank_line', 3,
   2327             'Do not leave a blank line after "%s:"' % matched.group(1))
   2328 
   2329   # Next, we complain if there's a comment too near the text
   2330   commentpos = line.find('//')
   2331   if commentpos != -1:
   2332     # Check if the // may be in quotes.  If so, ignore it
   2333     # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
   2334     if (line.count('"', 0, commentpos) -
   2335         line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
   2336       # Allow one space for new scopes, two spaces otherwise:
   2337       if (not Match(r'^\s*{ //', line) and
   2338           ((commentpos >= 1 and
   2339             line[commentpos-1] not in string.whitespace) or
   2340            (commentpos >= 2 and
   2341             line[commentpos-2] not in string.whitespace))):
   2342         error(filename, linenum, 'whitespace/comments', 2,
   2343               'At least two spaces is best between code and comments')
   2344       # There should always be a space between the // and the comment
   2345       commentend = commentpos + 2
   2346       if commentend < len(line) and not line[commentend] == ' ':
   2347         # but some lines are exceptions -- e.g. if they're big
   2348         # comment delimiters like:
   2349         # //----------------------------------------------------------
   2350         # or are an empty C++ style Doxygen comment, like:
   2351         # ///
   2352         # or they begin with multiple slashes followed by a space:
   2353         # //////// Header comment
   2354         match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
   2355                  Search(r'^/$', line[commentend:]) or
   2356                  Search(r'^/+ ', line[commentend:]))
   2357         if not match:
   2358           error(filename, linenum, 'whitespace/comments', 4,
   2359                 'Should have a space between // and comment')
   2360       CheckComment(line[commentpos:], filename, linenum, error)
   2361 
   2362   line = clean_lines.elided[linenum]  # get rid of comments and strings
   2363 
   2364   # Don't try to do spacing checks for operator methods
   2365   line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
   2366 
   2367   # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
   2368   # Otherwise not.  Note we only check for non-spaces on *both* sides;
   2369   # sometimes people put non-spaces on one side when aligning ='s among
   2370   # many lines (not that this is behavior that I approve of...)
   2371   if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
   2372     error(filename, linenum, 'whitespace/operators', 4,
   2373           'Missing spaces around =')
   2374 
   2375   # It's ok not to have spaces around binary operators like + - * /, but if
   2376   # there's too little whitespace, we get concerned.  It's hard to tell,
   2377   # though, so we punt on this one for now.  TODO.
   2378 
   2379   # You should always have whitespace around binary operators.
   2380   #
   2381   # Check <= and >= first to avoid false positives with < and >, then
   2382   # check non-include lines for spacing around < and >.
   2383   match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
   2384   if match:
   2385     error(filename, linenum, 'whitespace/operators', 3,
   2386           'Missing spaces around %s' % match.group(1))
   2387   # We allow no-spaces around << when used like this: 10<<20, but
   2388   # not otherwise (particularly, not when used as streams)
   2389   match = Search(r'(\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
   2390   if match and not (match.group(1).isdigit() and match.group(2).isdigit()):
   2391     error(filename, linenum, 'whitespace/operators', 3,
   2392           'Missing spaces around <<')
   2393   elif not Match(r'#.*include', line):
   2394     # Avoid false positives on ->
   2395     reduced_line = line.replace('->', '')
   2396 
   2397     # Look for < that is not surrounded by spaces.  This is only
   2398     # triggered if both sides are missing spaces, even though
   2399     # technically should should flag if at least one side is missing a
   2400     # space.  This is done to avoid some false positives with shifts.
   2401     match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
   2402     if (match and
   2403         not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
   2404       error(filename, linenum, 'whitespace/operators', 3,
   2405             'Missing spaces around <')
   2406 
   2407     # Look for > that is not surrounded by spaces.  Similar to the
   2408     # above, we only trigger if both sides are missing spaces to avoid
   2409     # false positives with shifts.
   2410     match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
   2411     if (match and
   2412         not FindPreviousMatchingAngleBracket(clean_lines, linenum,
   2413                                              match.group(1))):
   2414       error(filename, linenum, 'whitespace/operators', 3,
   2415             'Missing spaces around >')
   2416 
   2417   # We allow no-spaces around >> for almost anything.  This is because
   2418   # C++11 allows ">>" to close nested templates, which accounts for
   2419   # most cases when ">>" is not followed by a space.
   2420   #
   2421   # We still warn on ">>" followed by alpha character, because that is
   2422   # likely due to ">>" being used for right shifts, e.g.:
   2423   #   value >> alpha
   2424   #
   2425   # When ">>" is used to close templates, the alphanumeric letter that
   2426   # follows would be part of an identifier, and there should still be
   2427   # a space separating the template type and the identifier.
   2428   #   type<type<type>> alpha
   2429   match = Search(r'>>[a-zA-Z_]', line)
   2430   if match:
   2431     error(filename, linenum, 'whitespace/operators', 3,
   2432           'Missing spaces around >>')
   2433 
   2434   # There shouldn't be space around unary operators
   2435   match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
   2436   if match:
   2437     error(filename, linenum, 'whitespace/operators', 4,
   2438           'Extra space for operator %s' % match.group(1))
   2439 
   2440   # A pet peeve of mine: no spaces after an if, while, switch, or for
   2441   match = Search(r' (if\(|for\(|while\(|switch\()', line)
   2442   if match:
   2443     error(filename, linenum, 'whitespace/parens', 5,
   2444           'Missing space before ( in %s' % match.group(1))
   2445 
   2446   # For if/for/while/switch, the left and right parens should be
   2447   # consistent about how many spaces are inside the parens, and
   2448   # there should either be zero or one spaces inside the parens.
   2449   # We don't want: "if ( foo)" or "if ( foo   )".
   2450   # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   2451   match = Search(r'\b(if|for|while|switch)\s*'
   2452                  r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
   2453                  line)
   2454   if match:
   2455     if len(match.group(2)) != len(match.group(4)):
   2456       if not (match.group(3) == ';' and
   2457               len(match.group(2)) == 1 + len(match.group(4)) or
   2458               not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
   2459         error(filename, linenum, 'whitespace/parens', 5,
   2460               'Mismatching spaces inside () in %s' % match.group(1))
   2461     if not len(match.group(2)) in [0, 1]:
   2462       error(filename, linenum, 'whitespace/parens', 5,
   2463             'Should have zero or one spaces inside ( and ) in %s' %
   2464             match.group(1))
   2465 
   2466   # You should always have a space after a comma (either as fn arg or operator)
   2467   if Search(r',[^\s]', line):
   2468     error(filename, linenum, 'whitespace/comma', 3,
   2469           'Missing space after ,')
   2470 
   2471   # You should always have a space after a semicolon
   2472   # except for few corner cases
   2473   # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
   2474   # space after ;
   2475   if Search(r';[^\s};\\)/]', line):
   2476     error(filename, linenum, 'whitespace/semicolon', 3,
   2477           'Missing space after ;')
   2478 
   2479   # Next we will look for issues with function calls.
   2480   CheckSpacingForFunctionCall(filename, line, linenum, error)
   2481 
   2482   # Except after an opening paren, or after another opening brace (in case of
   2483   # an initializer list, for instance), you should have spaces before your
   2484   # braces. And since you should never have braces at the beginning of a line,
   2485   # this is an easy test.
   2486   if Search(r'[^ ({]{', line):
   2487     error(filename, linenum, 'whitespace/braces', 5,
   2488           'Missing space before {')
   2489 
   2490   # Make sure '} else {' has spaces.
   2491   if Search(r'}else', line):
   2492     error(filename, linenum, 'whitespace/braces', 5,
   2493           'Missing space before else')
   2494 
   2495   # You shouldn't have spaces before your brackets, except maybe after
   2496   # 'delete []' or 'new char * []'.
   2497   if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
   2498     error(filename, linenum, 'whitespace/braces', 5,
   2499           'Extra space before [')
   2500 
   2501   # You shouldn't have a space before a semicolon at the end of the line.
   2502   # There's a special case for "for" since the style guide allows space before
   2503   # the semicolon there.
   2504   if Search(r':\s*;\s*$', line):
   2505     error(filename, linenum, 'whitespace/semicolon', 5,
   2506           'Semicolon defining empty statement. Use {} instead.')
   2507   elif Search(r'^\s*;\s*$', line):
   2508     error(filename, linenum, 'whitespace/semicolon', 5,
   2509           'Line contains only semicolon. If this should be an empty statement, '
   2510           'use {} instead.')
   2511   elif (Search(r'\s+;\s*$', line) and
   2512         not Search(r'\bfor\b', line)):
   2513     error(filename, linenum, 'whitespace/semicolon', 5,
   2514           'Extra space before last semicolon. If this should be an empty '
   2515           'statement, use {} instead.')
   2516 
   2517   # In range-based for, we wanted spaces before and after the colon, but
   2518   # not around "::" tokens that might appear.
   2519   if (Search('for *\(.*[^:]:[^: ]', line) or
   2520       Search('for *\(.*[^: ]:[^:]', line)):
   2521     error(filename, linenum, 'whitespace/forcolon', 2,
   2522           'Missing space around colon in range-based for loop')
   2523 
   2524 
   2525 def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
   2526   """Checks for additional blank line issues related to sections.
   2527 
   2528   Currently the only thing checked here is blank line before protected/private.
   2529 
   2530   Args:
   2531     filename: The name of the current file.
   2532     clean_lines: A CleansedLines instance containing the file.
   2533     class_info: A _ClassInfo objects.
   2534     linenum: The number of the line to check.
   2535     error: The function to call with any errors found.
   2536   """
   2537   # Skip checks if the class is small, where small means 25 lines or less.
   2538   # 25 lines seems like a good cutoff since that's the usual height of
   2539   # terminals, and any class that can't fit in one screen can't really
   2540   # be considered "small".
   2541   #
   2542   # Also skip checks if we are on the first line.  This accounts for
   2543   # classes that look like
   2544   #   class Foo { public: ... };
   2545   #
   2546   # If we didn't find the end of the class, last_line would be zero,
   2547   # and the check will be skipped by the first condition.
   2548   if (class_info.last_line - class_info.starting_linenum <= 24 or
   2549       linenum <= class_info.starting_linenum):
   2550     return
   2551 
   2552   matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
   2553   if matched:
   2554     # Issue warning if the line before public/protected/private was
   2555     # not a blank line, but don't do this if the previous line contains
   2556     # "class" or "struct".  This can happen two ways:
   2557     #  - We are at the beginning of the class.
   2558     #  - We are forward-declaring an inner class that is semantically
   2559     #    private, but needed to be public for implementation reasons.
   2560     # Also ignores cases where the previous line ends with a backslash as can be
   2561     # common when defining classes in C macros.
   2562     prev_line = clean_lines.lines[linenum - 1]
   2563     if (not IsBlankLine(prev_line) and
   2564         not Search(r'\b(class|struct)\b', prev_line) and
   2565         not Search(r'\\$', prev_line)):
   2566       # Try a bit harder to find the beginning of the class.  This is to
   2567       # account for multi-line base-specifier lists, e.g.:
   2568       #   class Derived
   2569       #       : public Base {
   2570       end_class_head = class_info.starting_linenum
   2571       for i in range(class_info.starting_linenum, linenum):
   2572         if Search(r'\{\s*$', clean_lines.lines[i]):
   2573           end_class_head = i
   2574           break
   2575       if end_class_head < linenum - 1:
   2576         error(filename, linenum, 'whitespace/blank_line', 3,
   2577               '"%s:" should be preceded by a blank line' % matched.group(1))
   2578 
   2579 
   2580 def GetPreviousNonBlankLine(clean_lines, linenum):
   2581   """Return the most recent non-blank line and its line number.
   2582 
   2583   Args:
   2584     clean_lines: A CleansedLines instance containing the file contents.
   2585     linenum: The number of the line to check.
   2586 
   2587   Returns:
   2588     A tuple with two elements.  The first element is the contents of the last
   2589     non-blank line before the current line, or the empty string if this is the
   2590     first non-blank line.  The second is the line number of that line, or -1
   2591     if this is the first non-blank line.
   2592   """
   2593 
   2594   prevlinenum = linenum - 1
   2595   while prevlinenum >= 0:
   2596     prevline = clean_lines.elided[prevlinenum]
   2597     if not IsBlankLine(prevline):     # if not a blank line...
   2598       return (prevline, prevlinenum)
   2599     prevlinenum -= 1
   2600   return ('', -1)
   2601 
   2602 
   2603 def CheckBraces(filename, clean_lines, linenum, error):
   2604   """Looks for misplaced braces (e.g. at the end of line).
   2605 
   2606   Args:
   2607     filename: The name of the current file.
   2608     clean_lines: A CleansedLines instance containing the file.
   2609     linenum: The number of the line to check.
   2610     error: The function to call with any errors found.
   2611   """
   2612 
   2613   line = clean_lines.elided[linenum]        # get rid of comments and strings
   2614 
   2615   if Match(r'\s*{\s*$', line):
   2616     # We allow an open brace to start a line in the case where someone
   2617     # is using braces in a block to explicitly create a new scope,
   2618     # which is commonly used to control the lifetime of
   2619     # stack-allocated variables.  We don't detect this perfectly: we
   2620     # just don't complain if the last non-whitespace character on the
   2621     # previous non-blank line is ';', ':', '{', or '}', or if the previous
   2622     # line starts a preprocessor block.
   2623     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   2624     if (not Search(r'[;:}{]\s*$', prevline) and
   2625         not Match(r'\s*#', prevline)):
   2626       error(filename, linenum, 'whitespace/braces', 4,
   2627             '{ should almost always be at the end of the previous line')
   2628 
   2629   # An else clause should be on the same line as the preceding closing brace.
   2630   if Match(r'\s*else\s*', line):
   2631     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   2632     if Match(r'\s*}\s*$', prevline):
   2633       error(filename, linenum, 'whitespace/newline', 4,
   2634             'An else should appear on the same line as the preceding }')
   2635 
   2636   # If braces come on one side of an else, they should be on both.
   2637   # However, we have to worry about "else if" that spans multiple lines!
   2638   if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
   2639     if Search(r'}\s*else if([^{]*)$', line):       # could be multi-line if
   2640       # find the ( after the if
   2641       pos = line.find('else if')
   2642       pos = line.find('(', pos)
   2643       if pos > 0:
   2644         (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
   2645         if endline[endpos:].find('{') == -1:    # must be brace after if
   2646           error(filename, linenum, 'readability/braces', 5,
   2647                 'If an else has a brace on one side, it should have it on both')
   2648     else:            # common case: else not followed by a multi-line if
   2649       error(filename, linenum, 'readability/braces', 5,
   2650             'If an else has a brace on one side, it should have it on both')
   2651 
   2652   # Likewise, an else should never have the else clause on the same line
   2653   if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
   2654     error(filename, linenum, 'whitespace/newline', 4,
   2655           'Else clause should never be on same line as else (use 2 lines)')
   2656 
   2657   # In the same way, a do/while should never be on one line
   2658   if Match(r'\s*do [^\s{]', line):
   2659     error(filename, linenum, 'whitespace/newline', 4,
   2660           'do/while clauses should not be on a single line')
   2661 
   2662   # Braces shouldn't be followed by a ; unless they're defining a struct
   2663   # or initializing an array.
   2664   # We can't tell in general, but we can for some common cases.
   2665   prevlinenum = linenum
   2666   while True:
   2667     (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
   2668     if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
   2669       line = prevline + line
   2670     else:
   2671       break
   2672   if (Search(r'{.*}\s*;', line) and
   2673       line.count('{') == line.count('}') and
   2674       not Search(r'struct|class|enum|\s*=\s*{', line)):
   2675     error(filename, linenum, 'readability/braces', 4,
   2676           "You don't need a ; after a }")
   2677 
   2678 
   2679 def CheckEmptyLoopBody(filename, clean_lines, linenum, error):
   2680   """Loop for empty loop body with only a single semicolon.
   2681 
   2682   Args:
   2683     filename: The name of the current file.
   2684     clean_lines: A CleansedLines instance containing the file.
   2685     linenum: The number of the line to check.
   2686     error: The function to call with any errors found.
   2687   """
   2688 
   2689   # Search for loop keywords at the beginning of the line.  Because only
   2690   # whitespaces are allowed before the keywords, this will also ignore most
   2691   # do-while-loops, since those lines should start with closing brace.
   2692   line = clean_lines.elided[linenum]
   2693   if Match(r'\s*(for|while)\s*\(', line):
   2694     # Find the end of the conditional expression
   2695     (end_line, end_linenum, end_pos) = CloseExpression(
   2696         clean_lines, linenum, line.find('('))
   2697 
   2698     # Output warning if what follows the condition expression is a semicolon.
   2699     # No warning for all other cases, including whitespace or newline, since we
   2700     # have a separate check for semicolons preceded by whitespace.
   2701     if end_pos >= 0 and Match(r';', end_line[end_pos:]):
   2702       error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
   2703             'Empty loop bodies should use {} or continue')
   2704 
   2705 
   2706 def ReplaceableCheck(operator, macro, line):
   2707   """Determine whether a basic CHECK can be replaced with a more specific one.
   2708 
   2709   For example suggest using CHECK_EQ instead of CHECK(a == b) and
   2710   similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
   2711 
   2712   Args:
   2713     operator: The C++ operator used in the CHECK.
   2714     macro: The CHECK or EXPECT macro being called.
   2715     line: The current source line.
   2716 
   2717   Returns:
   2718     True if the CHECK can be replaced with a more specific one.
   2719   """
   2720 
   2721   # This matches decimal and hex integers, strings, and chars (in that order).
   2722   match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
   2723 
   2724   # Expression to match two sides of the operator with something that
   2725   # looks like a literal, since CHECK(x == iterator) won't compile.
   2726   # This means we can't catch all the cases where a more specific
   2727   # CHECK is possible, but it's less annoying than dealing with
   2728   # extraneous warnings.
   2729   match_this = (r'\s*' + macro + r'\((\s*' +
   2730                 match_constant + r'\s*' + operator + r'[^<>].*|'
   2731                 r'.*[^<>]' + operator + r'\s*' + match_constant +
   2732                 r'\s*\))')
   2733 
   2734   # Don't complain about CHECK(x == NULL) or similar because
   2735   # CHECK_EQ(x, NULL) won't compile (requires a cast).
   2736   # Also, don't complain about more complex boolean expressions
   2737   # involving && or || such as CHECK(a == b || c == d).
   2738   return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
   2739 
   2740 
   2741 def CheckCheck(filename, clean_lines, linenum, error):
   2742   """Checks the use of CHECK and EXPECT macros.
   2743 
   2744   Args:
   2745     filename: The name of the current file.
   2746     clean_lines: A CleansedLines instance containing the file.
   2747     linenum: The number of the line to check.
   2748     error: The function to call with any errors found.
   2749   """
   2750 
   2751   # Decide the set of replacement macros that should be suggested
   2752   raw_lines = clean_lines.raw_lines
   2753   current_macro = ''
   2754   for macro in _CHECK_MACROS:
   2755     if raw_lines[linenum].find(macro) >= 0:
   2756       current_macro = macro
   2757       break
   2758   if not current_macro:
   2759     # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
   2760     return
   2761 
   2762   line = clean_lines.elided[linenum]        # get rid of comments and strings
   2763 
   2764   # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
   2765   for operator in ['==', '!=', '>=', '>', '<=', '<']:
   2766     if ReplaceableCheck(operator, current_macro, line):
   2767       error(filename, linenum, 'readability/check', 2,
   2768             'Consider using %s instead of %s(a %s b)' % (
   2769                 _CHECK_REPLACEMENT[current_macro][operator],
   2770                 current_macro, operator))
   2771       break
   2772 
   2773 
   2774 def CheckAltTokens(filename, clean_lines, linenum, error):
   2775   """Check alternative keywords being used in boolean expressions.
   2776 
   2777   Args:
   2778     filename: The name of the current file.
   2779     clean_lines: A CleansedLines instance containing the file.
   2780     linenum: The number of the line to check.
   2781     error: The function to call with any errors found.
   2782   """
   2783   line = clean_lines.elided[linenum]
   2784 
   2785   # Avoid preprocessor lines
   2786   if Match(r'^\s*#', line):
   2787     return
   2788 
   2789   # Last ditch effort to avoid multi-line comments.  This will not help
   2790   # if the comment started before the current line or ended after the
   2791   # current line, but it catches most of the false positives.  At least,
   2792   # it provides a way to workaround this warning for people who use
   2793   # multi-line comments in preprocessor macros.
   2794   #
   2795   # TODO(unknown): remove this once cpplint has better support for
   2796   # multi-line comments.
   2797   if line.find('/*') >= 0 or line.find('*/') >= 0:
   2798     return
   2799 
   2800   for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
   2801     error(filename, linenum, 'readability/alt_tokens', 2,
   2802           'Use operator %s instead of %s' % (
   2803               _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
   2804 
   2805 
   2806 def GetLineWidth(line):
   2807   """Determines the width of the line in column positions.
   2808 
   2809   Args:
   2810     line: A string, which may be a Unicode string.
   2811 
   2812   Returns:
   2813     The width of the line in column positions, accounting for Unicode
   2814     combining characters and wide characters.
   2815   """
   2816   if isinstance(line, unicode):
   2817     width = 0
   2818     for uc in unicodedata.normalize('NFC', line):
   2819       if unicodedata.east_asian_width(uc) in ('W', 'F'):
   2820         width += 2
   2821       elif not unicodedata.combining(uc):
   2822         width += 1
   2823     return width
   2824   else:
   2825     return len(line)
   2826 
   2827 
   2828 def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
   2829                error):
   2830   """Checks rules from the 'C++ style rules' section of cppguide.html.
   2831 
   2832   Most of these rules are hard to test (naming, comment style), but we
   2833   do what we can.  In particular we check for 2-space indents, line lengths,
   2834   tab usage, spaces inside code, etc.
   2835 
   2836   Args:
   2837     filename: The name of the current file.
   2838     clean_lines: A CleansedLines instance containing the file.
   2839     linenum: The number of the line to check.
   2840     file_extension: The extension (without the dot) of the filename.
   2841     nesting_state: A _NestingState instance which maintains information about
   2842                    the current stack of nested blocks being parsed.
   2843     error: The function to call with any errors found.
   2844   """
   2845 
   2846   raw_lines = clean_lines.raw_lines
   2847   line = raw_lines[linenum]
   2848 
   2849   if line.find('\t') != -1:
   2850     error(filename, linenum, 'whitespace/tab', 1,
   2851           'Tab found; better to use spaces')
   2852 
   2853   # One or three blank spaces at the beginning of the line is weird; it's
   2854   # hard to reconcile that with 2-space indents.
   2855   # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
   2856   # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
   2857   # if(RLENGTH > 20) complain = 0;
   2858   # if(match($0, " +(error|private|public|protected):")) complain = 0;
   2859   # if(match(prev, "&& *$")) complain = 0;
   2860   # if(match(prev, "\\|\\| *$")) complain = 0;
   2861   # if(match(prev, "[\",=><] *$")) complain = 0;
   2862   # if(match($0, " <<")) complain = 0;
   2863   # if(match(prev, " +for \\(")) complain = 0;
   2864   # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
   2865   initial_spaces = 0
   2866   cleansed_line = clean_lines.elided[linenum]
   2867   while initial_spaces < len(line) and line[initial_spaces] == ' ':
   2868     initial_spaces += 1
   2869   if line and line[-1].isspace():
   2870     error(filename, linenum, 'whitespace/end_of_line', 4,
   2871           'Line ends in whitespace.  Consider deleting these extra spaces.')
   2872   # There are certain situations we allow one space, notably for labels
   2873   elif ((initial_spaces == 1 or initial_spaces == 3) and
   2874         not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
   2875     error(filename, linenum, 'whitespace/indent', 3,
   2876           'Weird number of spaces at line-start.  '
   2877           'Are you using a 2-space indent?')
   2878   # Labels should always be indented at least one space.
   2879   elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
   2880                                                           line):
   2881     error(filename, linenum, 'whitespace/labels', 4,
   2882           'Labels should always be indented at least one space.  '
   2883           'If this is a member-initializer list in a constructor or '
   2884           'the base class list in a class definition, the colon should '
   2885           'be on the following line.')
   2886 
   2887 
   2888   # Check if the line is a header guard.
   2889   is_header_guard = False
   2890   if file_extension == 'h':
   2891     cppvar = GetHeaderGuardCPPVariable(filename)
   2892     if (line.startswith('#ifndef %s' % cppvar) or
   2893         line.startswith('#define %s' % cppvar) or
   2894         line.startswith('#endif  // %s' % cppvar)):
   2895       is_header_guard = True
   2896   # #include lines and header guards can be long, since there's no clean way to
   2897   # split them.
   2898   #
   2899   # URLs can be long too.  It's possible to split these, but it makes them
   2900   # harder to cut&paste.
   2901   #
   2902   # The "$Id:...$" comment may also get very long without it being the
   2903   # developers fault.
   2904   if (not line.startswith('#include') and not is_header_guard and
   2905       not Match(r'^\s*//.*http(s?)://\S*$', line) and
   2906       not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
   2907     line_width = GetLineWidth(line)
   2908     if line_width > 100:
   2909       error(filename, linenum, 'whitespace/line_length', 4,
   2910             'Lines should very rarely be longer than 100 characters')
   2911     elif line_width > 80:
   2912       error(filename, linenum, 'whitespace/line_length', 2,
   2913             'Lines should be <= 80 characters long')
   2914 
   2915   if (cleansed_line.count(';') > 1 and
   2916       # for loops are allowed two ;'s (and may run over two lines).
   2917       cleansed_line.find('for') == -1 and
   2918       (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
   2919        GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
   2920       # It's ok to have many commands in a switch case that fits in 1 line
   2921       not ((cleansed_line.find('case ') != -1 or
   2922             cleansed_line.find('default:') != -1) and
   2923            cleansed_line.find('break;') != -1)):
   2924     error(filename, linenum, 'whitespace/newline', 0,
   2925           'More than one command on the same line')
   2926 
   2927   # Some more style checks
   2928   CheckBraces(filename, clean_lines, linenum, error)
   2929   CheckEmptyLoopBody(filename, clean_lines, linenum, error)
   2930   CheckAccess(filename, clean_lines, linenum, nesting_state, error)
   2931   CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
   2932   CheckCheck(filename, clean_lines, linenum, error)
   2933   CheckAltTokens(filename, clean_lines, linenum, error)
   2934   classinfo = nesting_state.InnermostClass()
   2935   if classinfo:
   2936     CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
   2937 
   2938 
   2939 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
   2940 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
   2941 # Matches the first component of a filename delimited by -s and _s. That is:
   2942 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
   2943 #  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
   2944 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
   2945 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
   2946 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
   2947 
   2948 
   2949 def _DropCommonSuffixes(filename):
   2950   """Drops common suffixes like _test.cc or -inl.h from filename.
   2951 
   2952   For example:
   2953     >>> _DropCommonSuffixes('foo/foo-inl.h')
   2954     'foo/foo'
   2955     >>> _DropCommonSuffixes('foo/bar/foo.cc')
   2956     'foo/bar/foo'
   2957     >>> _DropCommonSuffixes('foo/foo_internal.h')
   2958     'foo/foo'
   2959     >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
   2960     'foo/foo_unusualinternal'
   2961 
   2962   Args:
   2963     filename: The input filename.
   2964 
   2965   Returns:
   2966     The filename with the common suffix removed.
   2967   """
   2968   for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
   2969                  'inl.h', 'impl.h', 'internal.h'):
   2970     if (filename.endswith(suffix) and len(filename) > len(suffix) and
   2971         filename[-len(suffix) - 1] in ('-', '_')):
   2972       return filename[:-len(suffix) - 1]
   2973   return os.path.splitext(filename)[0]
   2974 
   2975 
   2976 def _IsTestFilename(filename):
   2977   """Determines if the given filename has a suffix that identifies it as a test.
   2978 
   2979   Args:
   2980     filename: The input filename.
   2981 
   2982   Returns:
   2983     True if 'filename' looks like a test, False otherwise.
   2984   """
   2985   if (filename.endswith('_test.cc') or
   2986       filename.endswith('_unittest.cc') or
   2987       filename.endswith('_regtest.cc')):
   2988     return True
   2989   else:
   2990     return False
   2991 
   2992 
   2993 def _ClassifyInclude(fileinfo, include, is_system):
   2994   """Figures out what kind of header 'include' is.
   2995 
   2996   Args:
   2997     fileinfo: The current file cpplint is running over. A FileInfo instance.
   2998     include: The path to a #included file.
   2999     is_system: True if the #include used <> rather than "".
   3000 
   3001   Returns:
   3002     One of the _XXX_HEADER constants.
   3003 
   3004   For example:
   3005     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
   3006     _C_SYS_HEADER
   3007     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
   3008     _CPP_SYS_HEADER
   3009     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
   3010     _LIKELY_MY_HEADER
   3011     >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
   3012     ...                  'bar/foo_other_ext.h', False)
   3013     _POSSIBLE_MY_HEADER
   3014     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
   3015     _OTHER_HEADER
   3016   """
   3017   # This is a list of all standard c++ header files, except
   3018   # those already checked for above.
   3019   is_stl_h = include in _STL_HEADERS
   3020   is_cpp_h = is_stl_h or include in _CPP_HEADERS
   3021 
   3022   if is_system:
   3023     if is_cpp_h:
   3024       return _CPP_SYS_HEADER
   3025     else:
   3026       return _C_SYS_HEADER
   3027 
   3028   # If the target file and the include we're checking share a
   3029   # basename when we drop common extensions, and the include
   3030   # lives in . , then it's likely to be owned by the target file.
   3031   target_dir, target_base = (
   3032       os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
   3033   include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
   3034   if target_base == include_base and (
   3035       include_dir == target_dir or
   3036       include_dir == os.path.normpath(target_dir + '/../public')):
   3037     return _LIKELY_MY_HEADER
   3038 
   3039   # If the target and include share some initial basename
   3040   # component, it's possible the target is implementing the
   3041   # include, so it's allowed to be first, but we'll never
   3042   # complain if it's not there.
   3043   target_first_component = _RE_FIRST_COMPONENT.match(target_base)
   3044   include_first_component = _RE_FIRST_COMPONENT.match(include_base)
   3045   if (target_first_component and include_first_component and
   3046       target_first_component.group(0) ==
   3047       include_first_component.group(0)):
   3048     return _POSSIBLE_MY_HEADER
   3049 
   3050   return _OTHER_HEADER
   3051 
   3052 
   3053 
   3054 def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
   3055   """Check rules that are applicable to #include lines.
   3056 
   3057   Strings on #include lines are NOT removed from elided line, to make
   3058   certain tasks easier. However, to prevent false positives, checks
   3059   applicable to #include lines in CheckLanguage must be put here.
   3060 
   3061   Args:
   3062     filename: The name of the current file.
   3063     clean_lines: A CleansedLines instance containing the file.
   3064     linenum: The number of the line to check.
   3065     include_state: An _IncludeState instance in which the headers are inserted.
   3066     error: The function to call with any errors found.
   3067   """
   3068   fileinfo = FileInfo(filename)
   3069 
   3070   line = clean_lines.lines[linenum]
   3071 
   3072   # "include" should use the new style "foo/bar.h" instead of just "bar.h"
   3073   if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
   3074     error(filename, linenum, 'build/include', 4,
   3075           'Include the directory when naming .h files')
   3076 
   3077   # we shouldn't include a file more than once. actually, there are a
   3078   # handful of instances where doing so is okay, but in general it's
   3079   # not.
   3080   match = _RE_PATTERN_INCLUDE.search(line)
   3081   if match:
   3082     include = match.group(2)
   3083     is_system = (match.group(1) == '<')
   3084     if include in include_state:
   3085       error(filename, linenum, 'build/include', 4,
   3086             '"%s" already included at %s:%s' %
   3087             (include, filename, include_state[include]))
   3088     else:
   3089       include_state[include] = linenum
   3090 
   3091       # We want to ensure that headers appear in the right order:
   3092       # 1) for foo.cc, foo.h  (preferred location)
   3093       # 2) c system files
   3094       # 3) cpp system files
   3095       # 4) for foo.cc, foo.h  (deprecated location)
   3096       # 5) other google headers
   3097       #
   3098       # We classify each include statement as one of those 5 types
   3099       # using a number of techniques. The include_state object keeps
   3100       # track of the highest type seen, and complains if we see a
   3101       # lower type after that.
   3102       error_message = include_state.CheckNextIncludeOrder(
   3103           _ClassifyInclude(fileinfo, include, is_system))
   3104       if error_message:
   3105         error(filename, linenum, 'build/include_order', 4,
   3106               '%s. Should be: %s.h, c system, c++ system, other.' %
   3107               (error_message, fileinfo.BaseName()))
   3108       if not include_state.IsInAlphabeticalOrder(include):
   3109         error(filename, linenum, 'build/include_alpha', 4,
   3110               'Include "%s" not in alphabetical order' % include)
   3111 
   3112   # Look for any of the stream classes that are part of standard C++.
   3113   match = _RE_PATTERN_INCLUDE.match(line)
   3114   if match:
   3115     include = match.group(2)
   3116     if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
   3117       # Many unit tests use cout, so we exempt them.
   3118       if not _IsTestFilename(filename):
   3119         error(filename, linenum, 'readability/streams', 3,
   3120               'Streams are highly discouraged.')
   3121 
   3122 
   3123 def _GetTextInside(text, start_pattern):
   3124   """Retrieves all the text between matching open and close parentheses.
   3125 
   3126   Given a string of lines and a regular expression string, retrieve all the text
   3127   following the expression and between opening punctuation symbols like
   3128   (, [, or {, and the matching close-punctuation symbol. This properly nested
   3129   occurrences of the punctuations, so for the text like
   3130     printf(a(), b(c()));
   3131   a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
   3132   start_pattern must match string having an open punctuation symbol at the end.
   3133 
   3134   Args:
   3135     text: The lines to extract text. Its comments and strings must be elided.
   3136            It can be single line and can span multiple lines.
   3137     start_pattern: The regexp string indicating where to start extracting
   3138                    the text.
   3139   Returns:
   3140     The extracted text.
   3141     None if either the opening string or ending punctuation could not be found.
   3142   """
   3143   # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
   3144   # rewritten to use _GetTextInside (and use inferior regexp matching today).
   3145 
   3146   # Give opening punctuations to get the matching close-punctuations.
   3147   matching_punctuation = {'(': ')', '{': '}', '[': ']'}
   3148   closing_punctuation = set(matching_punctuation.itervalues())
   3149 
   3150   # Find the position to start extracting text.
   3151   match = re.search(start_pattern, text, re.M)
   3152   if not match:  # start_pattern not found in text.
   3153     return None
   3154   start_position = match.end(0)
   3155 
   3156   assert start_position > 0, (
   3157       'start_pattern must ends with an opening punctuation.')
   3158   assert text[start_position - 1] in matching_punctuation, (
   3159       'start_pattern must ends with an opening punctuation.')
   3160   # Stack of closing punctuations we expect to have in text after position.
   3161   punctuation_stack = [matching_punctuation[text[start_position - 1]]]
   3162   position = start_position
   3163   while punctuation_stack and position < len(text):
   3164     if text[position] == punctuation_stack[-1]:
   3165       punctuation_stack.pop()
   3166     elif text[position] in closing_punctuation:
   3167       # A closing punctuation without matching opening punctuations.
   3168       return None
   3169     elif text[position] in matching_punctuation:
   3170       punctuation_stack.append(matching_punctuation[text[position]])
   3171     position += 1
   3172   if punctuation_stack:
   3173     # Opening punctuations left without matching close-punctuations.
   3174     return None
   3175   # punctuations match.
   3176   return text[start_position:position - 1]
   3177 
   3178 
   3179 def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
   3180                   error):
   3181   """Checks rules from the 'C++ language rules' section of cppguide.html.
   3182 
   3183   Some of these rules are hard to test (function overloading, using
   3184   uint32 inappropriately), but we do the best we can.
   3185 
   3186   Args:
   3187     filename: The name of the current file.
   3188     clean_lines: A CleansedLines instance containing the file.
   3189     linenum: The number of the line to check.
   3190     file_extension: The extension (without the dot) of the filename.
   3191     include_state: An _IncludeState instance in which the headers are inserted.
   3192     error: The function to call with any errors found.
   3193   """
   3194   # If the line is empty or consists of entirely a comment, no need to
   3195   # check it.
   3196   line = clean_lines.elided[linenum]
   3197   if not line:
   3198     return
   3199 
   3200   match = _RE_PATTERN_INCLUDE.search(line)
   3201   if match:
   3202     CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
   3203     return
   3204 
   3205   # Create an extended_line, which is the concatenation of the current and
   3206   # next lines, for more effective checking of code that may span more than one
   3207   # line.
   3208   if linenum + 1 < clean_lines.NumLines():
   3209     extended_line = line + clean_lines.elided[linenum + 1]
   3210   else:
   3211     extended_line = line
   3212 
   3213   # Make Windows paths like Unix.
   3214   fullname = os.path.abspath(filename).replace('\\', '/')
   3215 
   3216   # TODO(unknown): figure out if they're using default arguments in fn proto.
   3217 
   3218   # Check for non-const references in functions.  This is tricky because &
   3219   # is also used to take the address of something.  We allow <> for templates,
   3220   # (ignoring whatever is between the braces) and : for classes.
   3221   # These are complicated re's.  They try to capture the following:
   3222   # paren (for fn-prototype start), typename, &, varname.  For the const
   3223   # version, we're willing for const to be before typename or after
   3224   # Don't check the implementation on same line.
   3225   fnline = line.split('{', 1)[0]
   3226   if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
   3227       len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
   3228                      r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
   3229       len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
   3230                      fnline))):
   3231 
   3232     # We allow non-const references in a few standard places, like functions
   3233     # called "swap()" or iostream operators like "<<" or ">>". We also filter
   3234     # out for loops, which lint otherwise mistakenly thinks are functions.
   3235     if not Search(
   3236         r'(for|swap|Swap|operator[<>][<>])\s*\(\s*'
   3237         r'(?:(?:typename\s*)?[\w:]|<.*>)+\s*&',
   3238         fnline):
   3239       error(filename, linenum, 'runtime/references', 2,
   3240             'Is this a non-const reference? '
   3241             'If so, make const or use a pointer.')
   3242 
   3243   # Check to see if they're using an conversion function cast.
   3244   # I just try to capture the most common basic types, though there are more.
   3245   # Parameterless conversion functions, such as bool(), are allowed as they are
   3246   # probably a member operator declaration or default constructor.
   3247   match = Search(
   3248       r'(\bnew\s+)?\b'  # Grab 'new' operator, if it's there
   3249       r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
   3250   if match:
   3251     # gMock methods are defined using some variant of MOCK_METHODx(name, type)
   3252     # where type may be float(), int(string), etc.  Without context they are
   3253     # virtually indistinguishable from int(x) casts. Likewise, gMock's
   3254     # MockCallback takes a template parameter of the form return_type(arg_type),
   3255     # which looks much like the cast we're trying to detect.
   3256     # BEGIN android-added
   3257     # The C++ 2011 std::function class template exhibits a similar issue.
   3258     # END android-added
   3259     if (match.group(1) is None and  # If new operator, then this isn't a cast
   3260         not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
   3261              # BEGIN android-changed
   3262              # Match(r'^\s*MockCallback<.*>', line))):
   3263              Match(r'^\s*MockCallback<.*>', line) or
   3264              Match(r'^\s*std::function<.*>', line))):
   3265              # END android-changed
   3266       # Try a bit harder to catch gmock lines: the only place where
   3267       # something looks like an old-style cast is where we declare the
   3268       # return type of the mocked method, and the only time when we
   3269       # are missing context is if MOCK_METHOD was split across
   3270       # multiple lines (for example http://go/hrfhr ), so we only need
   3271       # to check the previous line for MOCK_METHOD.
   3272       if (linenum == 0 or
   3273           not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(\S+,\s*$',
   3274                     clean_lines.elided[linenum - 1])):
   3275         error(filename, linenum, 'readability/casting', 4,
   3276               'Using deprecated casting style.  '
   3277               'Use static_cast<%s>(...) instead' %
   3278               match.group(2))
   3279 
   3280   CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
   3281                   'static_cast',
   3282                   r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
   3283 
   3284   # This doesn't catch all cases. Consider (const char * const)"hello".
   3285   #
   3286   # (char *) "foo" should always be a const_cast (reinterpret_cast won't
   3287   # compile).
   3288   if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
   3289                      'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
   3290     pass
   3291   else:
   3292     # Check pointer casts for other than string constants
   3293     CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
   3294                     'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
   3295 
   3296   # In addition, we look for people taking the address of a cast.  This
   3297   # is dangerous -- casts can assign to temporaries, so the pointer doesn't
   3298   # point where you think.
   3299   if Search(
   3300       r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
   3301     error(filename, linenum, 'runtime/casting', 4,
   3302           ('Are you taking an address of a cast?  '
   3303            'This is dangerous: could be a temp var.  '
   3304            'Take the address before doing the cast, rather than after'))
   3305 
   3306   # Check for people declaring static/global STL strings at the top level.
   3307   # This is dangerous because the C++ language does not guarantee that
   3308   # globals with constructors are initialized before the first access.
   3309   match = Match(
   3310       r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
   3311       line)
   3312   # Make sure it's not a function.
   3313   # Function template specialization looks like: "string foo<Type>(...".
   3314   # Class template definitions look like: "string Foo<Type>::Method(...".
   3315   if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
   3316                          match.group(3)):
   3317     error(filename, linenum, 'runtime/string', 4,
   3318           'For a static/global string constant, use a C style string instead: '
   3319           '"%schar %s[]".' %
   3320           (match.group(1), match.group(2)))
   3321 
   3322   # Check that we're not using RTTI outside of testing code.
   3323   if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
   3324     error(filename, linenum, 'runtime/rtti', 5,
   3325           'Do not use dynamic_cast<>.  If you need to cast within a class '
   3326           "hierarchy, use static_cast<> to upcast.  Google doesn't support "
   3327           'RTTI.')
   3328 
   3329   if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
   3330     error(filename, linenum, 'runtime/init', 4,
   3331           'You seem to be initializing a member variable with itself.')
   3332 
   3333   if file_extension == 'h':
   3334     # TODO(unknown): check that 1-arg constructors are explicit.
   3335     #                How to tell it's a constructor?
   3336     #                (handled in CheckForNonStandardConstructs for now)
   3337     # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
   3338     #                (level 1 error)
   3339     pass
   3340 
   3341   # Check if people are using the verboten C basic types.  The only exception
   3342   # we regularly allow is "unsigned short port" for port.
   3343   if Search(r'\bshort port\b', line):
   3344     if not Search(r'\bunsigned short port\b', line):
   3345       error(filename, linenum, 'runtime/int', 4,
   3346             'Use "unsigned short" for ports, not "short"')
   3347   else:
   3348     match = Search(r'\b(short|long(?! +double)|long long)\b', line)
   3349     if match:
   3350       error(filename, linenum, 'runtime/int', 4,
   3351             'Use int16/int64/etc, rather than the C type %s' % match.group(1))
   3352 
   3353   # When snprintf is used, the second argument shouldn't be a literal.
   3354   match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
   3355   if match and match.group(2) != '0':
   3356     # If 2nd arg is zero, snprintf is used to calculate size.
   3357     error(filename, linenum, 'runtime/printf', 3,
   3358           'If you can, use sizeof(%s) instead of %s as the 2nd arg '
   3359           'to snprintf.' % (match.group(1), match.group(2)))
   3360 
   3361   # Check if some verboten C functions are being used.
   3362   if Search(r'\bsprintf\b', line):
   3363     error(filename, linenum, 'runtime/printf', 5,
   3364           'Never use sprintf.  Use snprintf instead.')
   3365   match = Search(r'\b(strcpy|strcat)\b', line)
   3366   if match:
   3367     error(filename, linenum, 'runtime/printf', 4,
   3368           'Almost always, snprintf is better than %s' % match.group(1))
   3369 
   3370   if Search(r'\bsscanf\b', line):
   3371     error(filename, linenum, 'runtime/printf', 1,
   3372           'sscanf can be ok, but is slow and can overflow buffers.')
   3373 
   3374   # Check if some verboten operator overloading is going on
   3375   # TODO(unknown): catch out-of-line unary operator&:
   3376   #   class X {};
   3377   #   int operator&(const X& x) { return 42; }  // unary operator&
   3378   # The trick is it's hard to tell apart from binary operator&:
   3379   #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
   3380   if Search(r'\boperator\s*&\s*\(\s*\)', line):
   3381     error(filename, linenum, 'runtime/operator', 4,
   3382           'Unary operator& is dangerous.  Do not use it.')
   3383 
   3384   # Check for suspicious usage of "if" like
   3385   # } if (a == b) {
   3386   if Search(r'\}\s*if\s*\(', line):
   3387     error(filename, linenum, 'readability/braces', 4,
   3388           'Did you mean "else if"? If not, start a new line for "if".')
   3389 
   3390   # Check for potential format string bugs like printf(foo).
   3391   # We constrain the pattern not to pick things like DocidForPrintf(foo).
   3392   # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
   3393   # TODO(sugawarayu): Catch the following case. Need to change the calling
   3394   # convention of the whole function to process multiple line to handle it.
   3395   #   printf(
   3396   #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
   3397   printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
   3398   if printf_args:
   3399     match = Match(r'([\w.\->()]+)$', printf_args)
   3400     if match and match.group(1) != '__VA_ARGS__':
   3401       function_name = re.search(r'\b((?:string)?printf)\s*\(',
   3402                                 line, re.I).group(1)
   3403       error(filename, linenum, 'runtime/printf', 4,
   3404             'Potential format string bug. Do %s("%%s", %s) instead.'
   3405             % (function_name, match.group(1)))
   3406 
   3407   # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
   3408   match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
   3409   if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
   3410     error(filename, linenum, 'runtime/memset', 4,
   3411           'Did you mean "memset(%s, 0, %s)"?'
   3412           % (match.group(1), match.group(2)))
   3413 
   3414   if Search(r'\busing namespace\b', line):
   3415     error(filename, linenum, 'build/namespaces', 5,
   3416           'Do not use namespace using-directives.  '
   3417           'Use using-declarations instead.')
   3418 
   3419   # Detect variable-length arrays.
   3420   match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
   3421   if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
   3422       match.group(3).find(']') == -1):
   3423     # Split the size using space and arithmetic operators as delimiters.
   3424     # If any of the resulting tokens are not compile time constants then
   3425     # report the error.
   3426     tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
   3427     is_const = True
   3428     skip_next = False
   3429     for tok in tokens:
   3430       if skip_next:
   3431         skip_next = False
   3432         continue
   3433 
   3434       if Search(r'sizeof\(.+\)', tok): continue
   3435       if Search(r'arraysize\(\w+\)', tok): continue
   3436 
   3437       tok = tok.lstrip('(')
   3438       tok = tok.rstrip(')')
   3439       if not tok: continue
   3440       if Match(r'\d+', tok): continue
   3441       if Match(r'0[xX][0-9a-fA-F]+', tok): continue
   3442       if Match(r'k[A-Z0-9]\w*', tok): continue
   3443       if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
   3444       if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
   3445       # A catch all for tricky sizeof cases, including 'sizeof expression',
   3446       # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
   3447       # requires skipping the next token because we split on ' ' and '*'.
   3448       if tok.startswith('sizeof'):
   3449         skip_next = True
   3450         continue
   3451       is_const = False
   3452       break
   3453     if not is_const:
   3454       error(filename, linenum, 'runtime/arrays', 1,
   3455             'Do not use variable-length arrays.  Use an appropriately named '
   3456             "('k' followed by CamelCase) compile-time constant for the size.")
   3457 
   3458   # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
   3459   # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
   3460   # in the class declaration.
   3461   match = Match(
   3462       (r'\s*'
   3463        r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
   3464        r'\(.*\);$'),
   3465       line)
   3466   if match and linenum + 1 < clean_lines.NumLines():
   3467     next_line = clean_lines.elided[linenum + 1]
   3468     # We allow some, but not all, declarations of variables to be present
   3469     # in the statement that defines the class.  The [\w\*,\s]* fragment of
   3470     # the regular expression below allows users to declare instances of
   3471     # the class or pointers to instances, but not less common types such
   3472     # as function pointers or arrays.  It's a tradeoff between allowing
   3473     # reasonable code and avoiding trying to parse more C++ using regexps.
   3474     if not Search(r'^\s*}[\w\*,\s]*;', next_line):
   3475       error(filename, linenum, 'readability/constructors', 3,
   3476             match.group(1) + ' should be the last thing in the class')
   3477 
   3478   # Check for use of unnamed namespaces in header files.  Registration
   3479   # macros are typically OK, so we allow use of "namespace {" on lines
   3480   # that end with backslashes.
   3481   if (file_extension == 'h'
   3482       and Search(r'\bnamespace\s*{', line)
   3483       and line[-1] != '\\'):
   3484     error(filename, linenum, 'build/namespaces', 4,
   3485           'Do not use unnamed namespaces in header files.  See '
   3486           'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
   3487           ' for more information.')
   3488 
   3489 
   3490 def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
   3491                     error):
   3492   """Checks for a C-style cast by looking for the pattern.
   3493 
   3494   This also handles sizeof(type) warnings, due to similarity of content.
   3495 
   3496   Args:
   3497     filename: The name of the current file.
   3498     linenum: The number of the line to check.
   3499     line: The line of code to check.
   3500     raw_line: The raw line of code to check, with comments.
   3501     cast_type: The string for the C++ cast to recommend.  This is either
   3502       reinterpret_cast, static_cast, or const_cast, depending.
   3503     pattern: The regular expression used to find C-style casts.
   3504     error: The function to call with any errors found.
   3505 
   3506   Returns:
   3507     True if an error was emitted.
   3508     False otherwise.
   3509   """
   3510   match = Search(pattern, line)
   3511   if not match:
   3512     return False
   3513 
   3514   # e.g., sizeof(int)
   3515   sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
   3516   if sizeof_match:
   3517     error(filename, linenum, 'runtime/sizeof', 1,
   3518           'Using sizeof(type).  Use sizeof(varname) instead if possible')
   3519     return True
   3520 
   3521   # operator++(int) and operator--(int)
   3522   if (line[0:match.start(1) - 1].endswith(' operator++') or
   3523       line[0:match.start(1) - 1].endswith(' operator--')):
   3524     return False
   3525 
   3526   remainder = line[match.end(0):]
   3527 
   3528   # The close paren is for function pointers as arguments to a function.
   3529   # eg, void foo(void (*bar)(int));
   3530   # The semicolon check is a more basic function check; also possibly a
   3531   # function pointer typedef.
   3532   # eg, void foo(int); or void foo(int) const;
   3533   # The equals check is for function pointer assignment.
   3534   # eg, void *(*foo)(int) = ...
   3535   # The > is for MockCallback<...> ...
   3536   #
   3537   # Right now, this will only catch cases where there's a single argument, and
   3538   # it's unnamed.  It should probably be expanded to check for multiple
   3539   # arguments with some unnamed.
   3540   function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)|>))', remainder)
   3541   if function_match:
   3542     if (not function_match.group(3) or
   3543         function_match.group(3) == ';' or
   3544         ('MockCallback<' not in raw_line and
   3545          '/*' not in raw_line)):
   3546       error(filename, linenum, 'readability/function', 3,
   3547             'All parameters should be named in a function')
   3548     return True
   3549 
   3550   # At this point, all that should be left is actual casts.
   3551   error(filename, linenum, 'readability/casting', 4,
   3552         'Using C-style cast.  Use %s<%s>(...) instead' %
   3553         (cast_type, match.group(1)))
   3554 
   3555   return True
   3556 
   3557 
   3558 _HEADERS_CONTAINING_TEMPLATES = (
   3559     ('<deque>', ('deque',)),
   3560     ('<functional>', ('unary_function', 'binary_function',
   3561                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
   3562                       'negate',
   3563                       'equal_to', 'not_equal_to', 'greater', 'less',
   3564                       'greater_equal', 'less_equal',
   3565                       'logical_and', 'logical_or', 'logical_not',
   3566                       'unary_negate', 'not1', 'binary_negate', 'not2',
   3567                       'bind1st', 'bind2nd',
   3568                       'pointer_to_unary_function',
   3569                       'pointer_to_binary_function',
   3570                       'ptr_fun',
   3571                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
   3572                       'mem_fun_ref_t',
   3573                       'const_mem_fun_t', 'const_mem_fun1_t',
   3574                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
   3575                       'mem_fun_ref',
   3576                      )),
   3577     ('<limits>', ('numeric_limits',)),
   3578     ('<list>', ('list',)),
   3579     ('<map>', ('map', 'multimap',)),
   3580     ('<memory>', ('allocator',)),
   3581     ('<queue>', ('queue', 'priority_queue',)),
   3582     ('<set>', ('set', 'multiset',)),
   3583     ('<stack>', ('stack',)),
   3584     ('<string>', ('char_traits', 'basic_string',)),
   3585     ('<utility>', ('pair',)),
   3586     ('<vector>', ('vector',)),
   3587 
   3588     # gcc extensions.
   3589     # Note: std::hash is their hash, ::hash is our hash
   3590     ('<hash_map>', ('hash_map', 'hash_multimap',)),
   3591     ('<hash_set>', ('hash_set', 'hash_multiset',)),
   3592     ('<slist>', ('slist',)),
   3593     )
   3594 
   3595 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
   3596 
   3597 _re_pattern_algorithm_header = []
   3598 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
   3599                   'transform'):
   3600   # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   3601   # type::max().
   3602   _re_pattern_algorithm_header.append(
   3603       (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
   3604        _template,
   3605        '<algorithm>'))
   3606 
   3607 _re_pattern_templates = []
   3608 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
   3609   for _template in _templates:
   3610     _re_pattern_templates.append(
   3611         (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
   3612          _template + '<>',
   3613          _header))
   3614 
   3615 
   3616 def FilesBelongToSameModule(filename_cc, filename_h):
   3617   """Check if these two filenames belong to the same module.
   3618 
   3619   The concept of a 'module' here is a as follows:
   3620   foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
   3621   same 'module' if they are in the same directory.
   3622   some/path/public/xyzzy and some/path/internal/xyzzy are also considered
   3623   to belong to the same module here.
   3624 
   3625   If the filename_cc contains a longer path than the filename_h, for example,
   3626   '/absolute/path/to/base/sysinfo.cc', and this file would include
   3627   'base/sysinfo.h', this function also produces the prefix needed to open the
   3628   header. This is used by the caller of this function to more robustly open the
   3629   header file. We don't have access to the real include paths in this context,
   3630   so we need this guesswork here.
   3631 
   3632   Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
   3633   according to this implementation. Because of this, this function gives
   3634   some false positives. This should be sufficiently rare in practice.
   3635 
   3636   Args:
   3637     filename_cc: is the path for the .cc file
   3638     filename_h: is the path for the header path
   3639 
   3640   Returns:
   3641     Tuple with a bool and a string:
   3642     bool: True if filename_cc and filename_h belong to the same module.
   3643     string: the additional prefix needed to open the header file.
   3644   """
   3645 
   3646   if not filename_cc.endswith('.cc'):
   3647     return (False, '')
   3648   filename_cc = filename_cc[:-len('.cc')]
   3649   if filename_cc.endswith('_unittest'):
   3650     filename_cc = filename_cc[:-len('_unittest')]
   3651   elif filename_cc.endswith('_test'):
   3652     filename_cc = filename_cc[:-len('_test')]
   3653   filename_cc = filename_cc.replace('/public/', '/')
   3654   filename_cc = filename_cc.replace('/internal/', '/')
   3655 
   3656   if not filename_h.endswith('.h'):
   3657     return (False, '')
   3658   filename_h = filename_h[:-len('.h')]
   3659   if filename_h.endswith('-inl'):
   3660     filename_h = filename_h[:-len('-inl')]
   3661   filename_h = filename_h.replace('/public/', '/')
   3662   filename_h = filename_h.replace('/internal/', '/')
   3663 
   3664   files_belong_to_same_module = filename_cc.endswith(filename_h)
   3665   common_path = ''
   3666   if files_belong_to_same_module:
   3667     common_path = filename_cc[:-len(filename_h)]
   3668   return files_belong_to_same_module, common_path
   3669 
   3670 
   3671 def UpdateIncludeState(filename, include_state, io=codecs):
   3672   """Fill up the include_state with new includes found from the file.
   3673 
   3674   Args:
   3675     filename: the name of the header to read.
   3676     include_state: an _IncludeState instance in which the headers are inserted.
   3677     io: The io factory to use to read the file. Provided for testability.
   3678 
   3679   Returns:
   3680     True if a header was succesfully added. False otherwise.
   3681   """
   3682   headerfile = None
   3683   try:
   3684     headerfile = io.open(filename, 'r', 'utf8', 'replace')
   3685   except IOError:
   3686     return False
   3687   linenum = 0
   3688   for line in headerfile:
   3689     linenum += 1
   3690     clean_line = CleanseComments(line)
   3691     match = _RE_PATTERN_INCLUDE.search(clean_line)
   3692     if match:
   3693       include = match.group(2)
   3694       # The value formatting is cute, but not really used right now.
   3695       # What matters here is that the key is in include_state.
   3696       include_state.setdefault(include, '%s:%d' % (filename, linenum))
   3697   return True
   3698 
   3699 
   3700 def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
   3701                               io=codecs):
   3702   """Reports for missing stl includes.
   3703 
   3704   This function will output warnings to make sure you are including the headers
   3705   necessary for the stl containers and functions that you use. We only give one
   3706   reason to include a header. For example, if you use both equal_to<> and
   3707   less<> in a .h file, only one (the latter in the file) of these will be
   3708   reported as a reason to include the <functional>.
   3709 
   3710   Args:
   3711     filename: The name of the current file.
   3712     clean_lines: A CleansedLines instance containing the file.
   3713     include_state: An _IncludeState instance.
   3714     error: The function to call with any errors found.
   3715     io: The IO factory to use to read the header file. Provided for unittest
   3716         injection.
   3717   """
   3718   required = {}  # A map of header name to linenumber and the template entity.
   3719                  # Example of required: { '<functional>': (1219, 'less<>') }
   3720 
   3721   for linenum in xrange(clean_lines.NumLines()):
   3722     line = clean_lines.elided[linenum]
   3723     if not line or line[0] == '#':
   3724       continue
   3725 
   3726     # String is special -- it is a non-templatized type in STL.
   3727     matched = _RE_PATTERN_STRING.search(line)
   3728     if matched:
   3729       # Don't warn about strings in non-STL namespaces:
   3730       # (We check only the first match per line; good enough.)
   3731       prefix = line[:matched.start()]
   3732       if prefix.endswith('std::') or not prefix.endswith('::'):
   3733         required['<string>'] = (linenum, 'string')
   3734 
   3735     for pattern, template, header in _re_pattern_algorithm_header:
   3736       if pattern.search(line):
   3737         required[header] = (linenum, template)
   3738 
   3739     # The following function is just a speed up, no semantics are changed.
   3740     if not '<' in line:  # Reduces the cpu time usage by skipping lines.
   3741       continue
   3742 
   3743     for pattern, template, header in _re_pattern_templates:
   3744       if pattern.search(line):
   3745         required[header] = (linenum, template)
   3746 
   3747   # The policy is that if you #include something in foo.h you don't need to
   3748   # include it again in foo.cc. Here, we will look at possible includes.
   3749   # Let's copy the include_state so it is only messed up within this function.
   3750   include_state = include_state.copy()
   3751 
   3752   # Did we find the header for this file (if any) and succesfully load it?
   3753   header_found = False
   3754 
   3755   # Use the absolute path so that matching works properly.
   3756   abs_filename = FileInfo(filename).FullName()
   3757 
   3758   # For Emacs's flymake.
   3759   # If cpplint is invoked from Emacs's flymake, a temporary file is generated
   3760   # by flymake and that file name might end with '_flymake.cc'. In that case,
   3761   # restore original file name here so that the corresponding header file can be
   3762   # found.
   3763   # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
   3764   # instead of 'foo_flymake.h'
   3765   abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
   3766 
   3767   # include_state is modified during iteration, so we iterate over a copy of
   3768   # the keys.
   3769   header_keys = include_state.keys()
   3770   for header in header_keys:
   3771     (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
   3772     fullpath = common_path + header
   3773     if same_module and UpdateIncludeState(fullpath, include_state, io):
   3774       header_found = True
   3775 
   3776   # If we can't find the header file for a .cc, assume it's because we don't
   3777   # know where to look. In that case we'll give up as we're not sure they
   3778   # didn't include it in the .h file.
   3779   # TODO(unknown): Do a better job of finding .h files so we are confident that
   3780   # not having the .h file means there isn't one.
   3781   if filename.endswith('.cc') and not header_found:
   3782     return
   3783 
   3784   # All the lines have been processed, report the errors found.
   3785   for required_header_unstripped in required:
   3786     template = required[required_header_unstripped][1]
   3787     if required_header_unstripped.strip('<>"') not in include_state:
   3788       error(filename, required[required_header_unstripped][0],
   3789             'build/include_what_you_use', 4,
   3790             'Add #include ' + required_header_unstripped + ' for ' + template)
   3791 
   3792 
   3793 _RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
   3794 
   3795 
   3796 def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
   3797   """Check that make_pair's template arguments are deduced.
   3798 
   3799   G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
   3800   specified explicitly, and such use isn't intended in any case.
   3801 
   3802   Args:
   3803     filename: The name of the current file.
   3804     clean_lines: A CleansedLines instance containing the file.
   3805     linenum: The number of the line to check.
   3806     error: The function to call with any errors found.
   3807   """
   3808   raw = clean_lines.raw_lines
   3809   line = raw[linenum]
   3810   match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
   3811   if match:
   3812     error(filename, linenum, 'build/explicit_make_pair',
   3813           4,  # 4 = high confidence
   3814           'For C++11-compatibility, omit template arguments from make_pair'
   3815           ' OR use pair directly OR if appropriate, construct a pair directly')
   3816 
   3817 
   3818 def ProcessLine(filename, file_extension, clean_lines, line,
   3819                 include_state, function_state, nesting_state, error,
   3820                 extra_check_functions=[]):
   3821   """Processes a single line in the file.
   3822 
   3823   Args:
   3824     filename: Filename of the file that is being processed.
   3825     file_extension: The extension (dot not included) of the file.
   3826     clean_lines: An array of strings, each representing a line of the file,
   3827                  with comments stripped.
   3828     line: Number of line being processed.
   3829     include_state: An _IncludeState instance in which the headers are inserted.
   3830     function_state: A _FunctionState instance which counts function lines, etc.
   3831     nesting_state: A _NestingState instance which maintains information about
   3832                    the current stack of nested blocks being parsed.
   3833     error: A callable to which errors are reported, which takes 4 arguments:
   3834            filename, line number, error level, and message
   3835     extra_check_functions: An array of additional check functions that will be
   3836                            run on each source line. Each function takes 4
   3837                            arguments: filename, clean_lines, line, error
   3838   """
   3839   raw_lines = clean_lines.raw_lines
   3840   ParseNolintSuppressions(filename, raw_lines[line], line, error)
   3841   nesting_state.Update(filename, clean_lines, line, error)
   3842   if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
   3843     return
   3844   CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
   3845   CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
   3846   CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
   3847   CheckLanguage(filename, clean_lines, line, file_extension, include_state,
   3848                 error)
   3849   CheckForNonStandardConstructs(filename, clean_lines, line,
   3850                                 nesting_state, error)
   3851   CheckPosixThreading(filename, clean_lines, line, error)
   3852   CheckInvalidIncrement(filename, clean_lines, line, error)
   3853   CheckMakePairUsesDeduction(filename, clean_lines, line, error)
   3854   for check_fn in extra_check_functions:
   3855     check_fn(filename, clean_lines, line, error)
   3856 
   3857 def ProcessFileData(filename, file_extension, lines, error,
   3858                     extra_check_functions=[]):
   3859   """Performs lint checks and reports any errors to the given error function.
   3860 
   3861   Args:
   3862     filename: Filename of the file that is being processed.
   3863     file_extension: The extension (dot not included) of the file.
   3864     lines: An array of strings, each representing a line of the file, with the
   3865            last element being empty if the file is terminated with a newline.
   3866     error: A callable to which errors are reported, which takes 4 arguments:
   3867            filename, line number, error level, and message
   3868     extra_check_functions: An array of additional check functions that will be
   3869                            run on each source line. Each function takes 4
   3870                            arguments: filename, clean_lines, line, error
   3871   """
   3872   lines = (['// marker so line numbers and indices both start at 1'] + lines +
   3873            ['// marker so line numbers end in a known way'])
   3874 
   3875   include_state = _IncludeState()
   3876   function_state = _FunctionState()
   3877   nesting_state = _NestingState()
   3878 
   3879   ResetNolintSuppressions()
   3880 
   3881   CheckForCopyright(filename, lines, error)
   3882 
   3883   if file_extension == 'h':
   3884     CheckForHeaderGuard(filename, lines, error)
   3885 
   3886   RemoveMultiLineComments(filename, lines, error)
   3887   clean_lines = CleansedLines(lines)
   3888   for line in xrange(clean_lines.NumLines()):
   3889     ProcessLine(filename, file_extension, clean_lines, line,
   3890                 include_state, function_state, nesting_state, error,
   3891                 extra_check_functions)
   3892   nesting_state.CheckClassFinished(filename, error)
   3893 
   3894   CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
   3895 
   3896   # We check here rather than inside ProcessLine so that we see raw
   3897   # lines rather than "cleaned" lines.
   3898   CheckForUnicodeReplacementCharacters(filename, lines, error)
   3899 
   3900   CheckForNewlineAtEOF(filename, lines, error)
   3901 
   3902 def ProcessFile(filename, vlevel, extra_check_functions=[]):
   3903   """Does google-lint on a single file.
   3904 
   3905   Args:
   3906     filename: The name of the file to parse.
   3907 
   3908     vlevel: The level of errors to report.  Every error of confidence
   3909     >= verbose_level will be reported.  0 is a good default.
   3910 
   3911     extra_check_functions: An array of additional check functions that will be
   3912                            run on each source line. Each function takes 4
   3913                            arguments: filename, clean_lines, line, error
   3914   """
   3915 
   3916   _SetVerboseLevel(vlevel)
   3917 # BEGIN android-added
   3918   old_errors = _cpplint_state.error_count
   3919 # END android-added
   3920 
   3921   try:
   3922     # Support the UNIX convention of using "-" for stdin.  Note that
   3923     # we are not opening the file with universal newline support
   3924     # (which codecs doesn't support anyway), so the resulting lines do
   3925     # contain trailing '\r' characters if we are reading a file that
   3926     # has CRLF endings.
   3927     # If after the split a trailing '\r' is present, it is removed
   3928     # below. If it is not expected to be present (i.e. os.linesep !=
   3929     # '\r\n' as in Windows), a warning is issued below if this file
   3930     # is processed.
   3931 
   3932     if filename == '-':
   3933       lines = codecs.StreamReaderWriter(sys.stdin,
   3934                                         codecs.getreader('utf8'),
   3935                                         codecs.getwriter('utf8'),
   3936                                         'replace').read().split('\n')
   3937     else:
   3938       lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
   3939 
   3940     carriage_return_found = False
   3941     # Remove trailing '\r'.
   3942     for linenum in range(len(lines)):
   3943       if lines[linenum].endswith('\r'):
   3944         lines[linenum] = lines[linenum].rstrip('\r')
   3945         carriage_return_found = True
   3946 
   3947   except IOError:
   3948     sys.stderr.write(
   3949         "Skipping input '%s': Can't open for reading\n" % filename)
   3950     return
   3951 
   3952   # Note, if no dot is found, this will give the entire filename as the ext.
   3953   file_extension = filename[filename.rfind('.') + 1:]
   3954 
   3955   # When reading from stdin, the extension is unknown, so no cpplint tests
   3956   # should rely on the extension.
   3957   if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
   3958       and file_extension != 'cpp'):
   3959     sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
   3960   else:
   3961     ProcessFileData(filename, file_extension, lines, Error,
   3962                     extra_check_functions)
   3963     if carriage_return_found and os.linesep != '\r\n':
   3964       # Use 0 for linenum since outputting only one error for potentially
   3965       # several lines.
   3966       Error(filename, 0, 'whitespace/newline', 1,
   3967             'One or more unexpected \\r (^M) found;'
   3968             'better to use only a \\n')
   3969 
   3970 # BEGIN android-changed
   3971   # sys.stderr.write('Done processing %s\n' % filename)
   3972   if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count:
   3973     sys.stderr.write('Done processing %s\n' % filename)
   3974 # END android-changed
   3975 
   3976 def PrintUsage(message):
   3977   """Prints a brief usage string and exits, optionally with an error message.
   3978 
   3979   Args:
   3980     message: The optional error message.
   3981   """
   3982   sys.stderr.write(_USAGE)
   3983   if message:
   3984     sys.exit('\nFATAL ERROR: ' + message)
   3985   else:
   3986     sys.exit(1)
   3987 
   3988 
   3989 def PrintCategories():
   3990   """Prints a list of all the error-categories used by error messages.
   3991 
   3992   These are the categories used to filter messages via --filter.
   3993   """
   3994   sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
   3995   sys.exit(0)
   3996 
   3997 
   3998 def ParseArguments(args):
   3999   """Parses the command line arguments.
   4000 
   4001   This may set the output format and verbosity level as side-effects.
   4002 
   4003   Args:
   4004     args: The command line arguments:
   4005 
   4006   Returns:
   4007     The list of filenames to lint.
   4008   """
   4009   try:
   4010     (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
   4011                                                  'stdout', # TODO(enh): added --stdout
   4012                                                  # BEGIN android-added
   4013                                                  'quiet',
   4014                                                  # END android-added
   4015                                                  'counting=',
   4016                                                  'filter=',
   4017                                                  'root='])
   4018   except getopt.GetoptError:
   4019     PrintUsage('Invalid arguments.')
   4020 
   4021   verbosity = _VerboseLevel()
   4022   output_format = _OutputFormat()
   4023   output_stream = sys.stderr # TODO(enh): added --stdout
   4024   filters = ''
   4025   # BEGIN android-added
   4026   quiet = _Quiet()
   4027   # END android-added
   4028   counting_style = ''
   4029 
   4030   for (opt, val) in opts:
   4031     if opt == '--help':
   4032       PrintUsage(None)
   4033     elif opt == '--stdout': # TODO(enh): added --stdout
   4034       output_stream = sys.stdout # TODO(enh): added --stdout
   4035     # BEGIN android-added
   4036     elif opt == '--quiet':
   4037       quiet = True
   4038     # END android-added
   4039     elif opt == '--output':
   4040       if not val in ('emacs', 'vs7', 'eclipse'):
   4041         PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
   4042       output_format = val
   4043     elif opt == '--verbose':
   4044       verbosity = int(val)
   4045     elif opt == '--filter':
   4046       filters = val
   4047       if not filters:
   4048         PrintCategories()
   4049     elif opt == '--counting':
   4050       if val not in ('total', 'toplevel', 'detailed'):
   4051         PrintUsage('Valid counting options are total, toplevel, and detailed')
   4052       counting_style = val
   4053     elif opt == '--root':
   4054       global _root
   4055       _root = val
   4056 
   4057   if not filenames:
   4058     PrintUsage('No files were specified.')
   4059 
   4060   _SetOutputFormat(output_format)
   4061   _SetVerboseLevel(verbosity)
   4062   _SetFilters(filters)
   4063   _SetCountingStyle(counting_style)
   4064   # BEGIN android-added
   4065   _SetQuiet(quiet)
   4066   # END android-added
   4067   sys.stderr = output_stream # TODO(enh): added --stdout
   4068 
   4069   return filenames
   4070 
   4071 
   4072 def main():
   4073   filenames = ParseArguments(sys.argv[1:])
   4074 
   4075   # Change stderr to write with replacement characters so we don't die
   4076   # if we try to print something containing non-ASCII characters.
   4077   sys.stderr = codecs.StreamReaderWriter(sys.stderr,
   4078                                          codecs.getreader('utf8'),
   4079                                          codecs.getwriter('utf8'),
   4080                                          'replace')
   4081 
   4082   _cpplint_state.ResetErrorCounts()
   4083   for filename in filenames:
   4084     ProcessFile(filename, _cpplint_state.verbose_level)
   4085   # BEGIN android-changed
   4086   # _cpplint_state.PrintErrorCounts()
   4087   if not _cpplint_state.quiet or _cpplint_state.error_count > 0:
   4088     _cpplint_state.PrintErrorCounts()
   4089   # END android-changed
   4090 
   4091   sys.exit(_cpplint_state.error_count > 0)
   4092 
   4093 
   4094 if __name__ == '__main__':
   4095   main()
   4096