Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/env python
      2 #
      3 # Copyright (c) 2009 Google Inc. All rights reserved.
      4 #
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #    * Redistributions of source code must retain the above copyright
     10 # notice, this list of conditions and the following disclaimer.
     11 #    * Redistributions in binary form must reproduce the above
     12 # copyright notice, this list of conditions and the following disclaimer
     13 # in the documentation and/or other materials provided with the
     14 # distribution.
     15 #    * Neither the name of Google Inc. nor the names of its
     16 # contributors may be used to endorse or promote products derived from
     17 # this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 # Here are some issues that I've had people identify in my code during reviews,
     32 # that I think are possible to flag automatically in a lint tool.  If these were
     33 # caught by lint, it would save time both for myself and that of my reviewers.
     34 # Most likely, some of these are beyond the scope of the current lint framework,
     35 # but I think it is valuable to retain these wish-list items even if they cannot
     36 # be immediately implemented.
     37 #
     38 #  Suggestions
     39 #  -----------
     40 #  - Check for no 'explicit' for multi-arg ctor
     41 #  - Check for boolean assign RHS in parens
     42 #  - Check for ctor initializer-list colon position and spacing
     43 #  - Check that if there's a ctor, there should be a dtor
     44 #  - Check accessors that return non-pointer member variables are
     45 #    declared const
     46 #  - Check accessors that return non-const pointer member vars are
     47 #    *not* declared const
     48 #  - Check for using public includes for testing
     49 #  - Check for spaces between brackets in one-line inline method
     50 #  - Check for no assert()
     51 #  - Check for spaces surrounding operators
     52 #  - Check for 0 in pointer context (should be NULL)
     53 #  - Check for 0 in char context (should be '\0')
     54 #  - Check for camel-case method name conventions for methods
     55 #    that are not simple inline getters and setters
     56 #  - Do not indent namespace contents
     57 #  - Avoid inlining non-trivial constructors in header files
     58 #  - Check for old-school (void) cast for call-sites of functions
     59 #    ignored return value
     60 #  - Check gUnit usage of anonymous namespace
     61 #  - Check for class declaration order (typedefs, consts, enums,
     62 #    ctor(s?), dtor, friend declarations, methods, member vars)
     63 #
     64 
     65 """Does google-lint on c++ files.
     66 
     67 The goal of this script is to identify places in the code that *may*
     68 be in non-compliance with google style.  It does not attempt to fix
     69 up these problems -- the point is to educate.  It does also not
     70 attempt to find all problems, or to ensure that everything it does
     71 find is legitimately a problem.
     72 
     73 In particular, we can get very confused by /* and // inside strings!
     74 We do a small hack, which is to ignore //'s with "'s after them on the
     75 same line, but it is far from perfect (in either direction).
     76 """
     77 
     78 import codecs
     79 import copy
     80 import getopt
     81 import math  # for log
     82 import os
     83 import re
     84 import sre_compile
     85 import string
     86 import sys
     87 import unicodedata
     88 
     89 
     90 _USAGE = """
     91 Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
     92                    [--counting=total|toplevel|detailed]
     93         <file> [file] ...
     94 
     95   The style guidelines this tries to follow are those in
     96     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml
     97 
     98   Every problem is given a confidence score from 1-5, with 5 meaning we are
     99   certain of the problem, and 1 meaning it could be a legitimate construct.
    100   This will miss some errors, and is not a substitute for a code review.
    101 
    102   To suppress false-positive errors of a certain category, add a
    103   'NOLINT(category)' comment to the line.  NOLINT or NOLINT(*)
    104   suppresses errors of all categories on that line.
    105 
    106   The files passed in will be linted; at least one file must be provided.
    107   Linted extensions are .cc, .cpp, and .h.  Other file types will be ignored.
    108 
    109   Flags:
    110 
    111     output=vs7
    112       By default, the output is formatted to ease emacs parsing.  Visual Studio
    113       compatible output (vs7) may also be used.  Other formats are unsupported.
    114 
    115     verbose=#
    116       Specify a number 0-5 to restrict errors to certain verbosity levels.
    117 
    118     filter=-x,+y,...
    119       Specify a comma-separated list of category-filters to apply: only
    120       error messages whose category names pass the filters will be printed.
    121       (Category names are printed with the message and look like
    122       "[whitespace/indent]".)  Filters are evaluated left to right.
    123       "-FOO" and "FOO" means "do not print categories that start with FOO".
    124       "+FOO" means "do print categories that start with FOO".
    125 
    126       Examples: --filter=-whitespace,+whitespace/braces
    127                 --filter=whitespace,runtime/printf,+runtime/printf_format
    128                 --filter=-,+build/include_what_you_use
    129 
    130       To see a list of all the categories used in cpplint, pass no arg:
    131          --filter=
    132 
    133     counting=total|toplevel|detailed
    134       The total number of errors found is always printed. If
    135       'toplevel' is provided, then the count of errors in each of
    136       the top-level categories like 'build' and 'whitespace' will
    137       also be printed. If 'detailed' is provided, then a count
    138       is provided for each category like 'build/class'.
    139 
    140     root=subdir
    141       The root directory used for deriving header guard CPP variable.
    142       By default, the header guard CPP variable is calculated as the relative
    143       path to the directory that contains .git, .hg, or .svn.  When this flag
    144       is specified, the relative path is calculated from the specified
    145       directory. If the specified directory does not exist, this flag is
    146       ignored.
    147 
    148       Examples:
    149         Assuing that src/.git exists, the header guard CPP variables for
    150         src/chrome/browser/ui/browser.h are:
    151 
    152         No flag => CHROME_BROWSER_UI_BROWSER_H_
    153         --root=chrome => BROWSER_UI_BROWSER_H_
    154         --root=chrome/browser => UI_BROWSER_H_
    155 """
    156 
    157 # We categorize each error message we print.  Here are the categories.
    158 # We want an explicit list so we can list them all in cpplint --filter=.
    159 # If you add a new error message with a new category, add it to the list
    160 # here!  cpplint_unittest.py should tell you if you forget to do this.
    161 # \ used for clearer layout -- pylint: disable-msg=C6013
    162 _ERROR_CATEGORIES = [
    163   'build/class',
    164   'build/deprecated',
    165   'build/endif_comment',
    166   'build/explicit_make_pair',
    167   'build/forward_decl',
    168   'build/header_guard',
    169   'build/include',
    170   'build/include_alpha',
    171   'build/include_order',
    172   'build/include_what_you_use',
    173   'build/namespaces',
    174   'build/printf_format',
    175   'build/storage_class',
    176   'legal/copyright',
    177   'readability/alt_tokens',
    178   'readability/braces',
    179   'readability/casting',
    180   'readability/check',
    181   'readability/constructors',
    182   'readability/fn_size',
    183   'readability/function',
    184   'readability/multiline_comment',
    185   'readability/multiline_string',
    186   'readability/namespace',
    187   'readability/nolint',
    188   'readability/streams',
    189   'readability/todo',
    190   'readability/utf8',
    191   'runtime/arrays',
    192   'runtime/casting',
    193   'runtime/explicit',
    194   'runtime/int',
    195   'runtime/init',
    196   'runtime/invalid_increment',
    197   'runtime/member_string_references',
    198   'runtime/memset',
    199   'runtime/operator',
    200   'runtime/printf',
    201   'runtime/printf_format',
    202   'runtime/references',
    203   'runtime/rtti',
    204   'runtime/sizeof',
    205   'runtime/string',
    206   'runtime/threadsafe_fn',
    207   'whitespace/blank_line',
    208   'whitespace/braces',
    209   'whitespace/comma',
    210   'whitespace/comments',
    211   'whitespace/empty_loop_body',
    212   'whitespace/end_of_line',
    213   'whitespace/ending_newline',
    214   'whitespace/forcolon',
    215   'whitespace/indent',
    216   'whitespace/labels',
    217   'whitespace/line_length',
    218   'whitespace/newline',
    219   'whitespace/operators',
    220   'whitespace/parens',
    221   'whitespace/semicolon',
    222   'whitespace/tab',
    223   'whitespace/todo'
    224   ]
    225 
    226 # The default state of the category filter. This is overrided by the --filter=
    227 # flag. By default all errors are on, so only add here categories that should be
    228 # off by default (i.e., categories that must be enabled by the --filter= flags).
    229 # All entries here should start with a '-' or '+', as in the --filter= flag.
    230 _DEFAULT_FILTERS = ['-build/include_alpha']
    231 
    232 # We used to check for high-bit characters, but after much discussion we
    233 # decided those were OK, as long as they were in UTF-8 and didn't represent
    234 # hard-coded international strings, which belong in a separate i18n file.
    235 
    236 # Headers that we consider STL headers.
    237 _STL_HEADERS = frozenset([
    238     'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
    239     'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
    240     'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'new',
    241     'pair.h', 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
    242     'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
    243     'utility', 'vector', 'vector.h',
    244     ])
    245 
    246 
    247 # Non-STL C++ system headers.
    248 _CPP_HEADERS = frozenset([
    249     'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
    250     'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
    251     'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
    252     'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
    253     'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
    254     'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
    255     'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream',
    256     'istream.h', 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
    257     'numeric', 'ostream', 'ostream.h', 'parsestream.h', 'pfstream.h',
    258     'PlotFile.h', 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h',
    259     'ropeimpl.h', 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
    260     'stdiostream.h', 'streambuf', 'streambuf.h', 'stream.h', 'strfile.h',
    261     'string', 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo',
    262     'valarray',
    263     ])
    264 
    265 
    266 # Assertion macros.  These are defined in base/logging.h and
    267 # testing/base/gunit.h.  Note that the _M versions need to come first
    268 # for substring matching to work.
    269 _CHECK_MACROS = [
    270     'DCHECK', 'CHECK',
    271     'EXPECT_TRUE_M', 'EXPECT_TRUE',
    272     'ASSERT_TRUE_M', 'ASSERT_TRUE',
    273     'EXPECT_FALSE_M', 'EXPECT_FALSE',
    274     'ASSERT_FALSE_M', 'ASSERT_FALSE',
    275     ]
    276 
    277 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
    278 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
    279 
    280 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
    281                         ('>=', 'GE'), ('>', 'GT'),
    282                         ('<=', 'LE'), ('<', 'LT')]:
    283   _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
    284   _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
    285   _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
    286   _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
    287   _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
    288   _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
    289 
    290 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
    291                             ('>=', 'LT'), ('>', 'LE'),
    292                             ('<=', 'GT'), ('<', 'GE')]:
    293   _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
    294   _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
    295   _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
    296   _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
    297 
    298 # Alternative tokens and their replacements.  For full list, see section 2.5
    299 # Alternative tokens [lex.digraph] in the C++ standard.
    300 #
    301 # Digraphs (such as '%:') are not included here since it's a mess to
    302 # match those on a word boundary.
    303 _ALT_TOKEN_REPLACEMENT = {
    304     'and': '&&',
    305     'bitor': '|',
    306     'or': '||',
    307     'xor': '^',
    308     'compl': '~',
    309     'bitand': '&',
    310     'and_eq': '&=',
    311     'or_eq': '|=',
    312     'xor_eq': '^=',
    313     'not': '!',
    314     'not_eq': '!='
    315     }
    316 
    317 # Compile regular expression that matches all the above keywords.  The "[ =()]"
    318 # bit is meant to avoid matching these keywords outside of boolean expressions.
    319 #
    320 # False positives include C-style multi-line comments (http://go/nsiut )
    321 # and multi-line strings (http://go/beujw ), but those have always been
    322 # troublesome for cpplint.
    323 _ALT_TOKEN_REPLACEMENT_PATTERN = re.compile(
    324     r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)')
    325 
    326 
    327 # These constants define types of headers for use with
    328 # _IncludeState.CheckNextIncludeOrder().
    329 _C_SYS_HEADER = 1
    330 _CPP_SYS_HEADER = 2
    331 _LIKELY_MY_HEADER = 3
    332 _POSSIBLE_MY_HEADER = 4
    333 _OTHER_HEADER = 5
    334 
    335 # These constants define the current inline assembly state
    336 _NO_ASM = 0       # Outside of inline assembly block
    337 _INSIDE_ASM = 1   # Inside inline assembly block
    338 _END_ASM = 2      # Last line of inline assembly block
    339 _BLOCK_ASM = 3    # The whole block is an inline assembly block
    340 
    341 # Match start of assembly blocks
    342 _MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)'
    343                         r'(?:\s+(volatile|__volatile__))?'
    344                         r'\s*[{(]')
    345 
    346 
    347 _regexp_compile_cache = {}
    348 
    349 # Finds occurrences of NOLINT or NOLINT(...).
    350 _RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?')
    351 
    352 # {str, set(int)}: a map from error categories to sets of linenumbers
    353 # on which those errors are expected and should be suppressed.
    354 _error_suppressions = {}
    355 
    356 # The root directory used for deriving header guard CPP variable.
    357 # This is set by --root flag.
    358 _root = None
    359 
    360 def ParseNolintSuppressions(filename, raw_line, linenum, error):
    361   """Updates the global list of error-suppressions.
    362 
    363   Parses any NOLINT comments on the current line, updating the global
    364   error_suppressions store.  Reports an error if the NOLINT comment
    365   was malformed.
    366 
    367   Args:
    368     filename: str, the name of the input file.
    369     raw_line: str, the line of input text, with comments.
    370     linenum: int, the number of the current line.
    371     error: function, an error handler.
    372   """
    373   # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*).
    374   matched = _RE_SUPPRESSION.search(raw_line)
    375   if matched:
    376     category = matched.group(1)
    377     if category in (None, '(*)'):  # => "suppress all"
    378       _error_suppressions.setdefault(None, set()).add(linenum)
    379     else:
    380       if category.startswith('(') and category.endswith(')'):
    381         category = category[1:-1]
    382         if category in _ERROR_CATEGORIES:
    383           _error_suppressions.setdefault(category, set()).add(linenum)
    384         else:
    385           error(filename, linenum, 'readability/nolint', 5,
    386                 'Unknown NOLINT error category: %s' % category)
    387 
    388 
    389 def ResetNolintSuppressions():
    390   "Resets the set of NOLINT suppressions to empty."
    391   _error_suppressions.clear()
    392 
    393 
    394 def IsErrorSuppressedByNolint(category, linenum):
    395   """Returns true if the specified error category is suppressed on this line.
    396 
    397   Consults the global error_suppressions map populated by
    398   ParseNolintSuppressions/ResetNolintSuppressions.
    399 
    400   Args:
    401     category: str, the category of the error.
    402     linenum: int, the current line number.
    403   Returns:
    404     bool, True iff the error should be suppressed due to a NOLINT comment.
    405   """
    406   return (linenum in _error_suppressions.get(category, set()) or
    407           linenum in _error_suppressions.get(None, set()))
    408 
    409 def Match(pattern, s):
    410   """Matches the string with the pattern, caching the compiled regexp."""
    411   # The regexp compilation caching is inlined in both Match and Search for
    412   # performance reasons; factoring it out into a separate function turns out
    413   # to be noticeably expensive.
    414   if not pattern in _regexp_compile_cache:
    415     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    416   return _regexp_compile_cache[pattern].match(s)
    417 
    418 
    419 def Search(pattern, s):
    420   """Searches the string for the pattern, caching the compiled regexp."""
    421   if not pattern in _regexp_compile_cache:
    422     _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
    423   return _regexp_compile_cache[pattern].search(s)
    424 
    425 
    426 class _IncludeState(dict):
    427   """Tracks line numbers for includes, and the order in which includes appear.
    428 
    429   As a dict, an _IncludeState object serves as a mapping between include
    430   filename and line number on which that file was included.
    431 
    432   Call CheckNextIncludeOrder() once for each header in the file, passing
    433   in the type constants defined above. Calls in an illegal order will
    434   raise an _IncludeError with an appropriate error message.
    435 
    436   """
    437   # self._section will move monotonically through this set. If it ever
    438   # needs to move backwards, CheckNextIncludeOrder will raise an error.
    439   _INITIAL_SECTION = 0
    440   _MY_H_SECTION = 1
    441   _C_SECTION = 2
    442   _CPP_SECTION = 3
    443   _OTHER_H_SECTION = 4
    444 
    445   _TYPE_NAMES = {
    446       _C_SYS_HEADER: 'C system header',
    447       _CPP_SYS_HEADER: 'C++ system header',
    448       _LIKELY_MY_HEADER: 'header this file implements',
    449       _POSSIBLE_MY_HEADER: 'header this file may implement',
    450       _OTHER_HEADER: 'other header',
    451       }
    452   _SECTION_NAMES = {
    453       _INITIAL_SECTION: "... nothing. (This can't be an error.)",
    454       _MY_H_SECTION: 'a header this file implements',
    455       _C_SECTION: 'C system header',
    456       _CPP_SECTION: 'C++ system header',
    457       _OTHER_H_SECTION: 'other header',
    458       }
    459 
    460   def __init__(self):
    461     dict.__init__(self)
    462     # The name of the current section.
    463     self._section = self._INITIAL_SECTION
    464     # The path of last found header.
    465     self._last_header = ''
    466 
    467   def CanonicalizeAlphabeticalOrder(self, header_path):
    468     """Returns a path canonicalized for alphabetical comparison.
    469 
    470     - replaces "-" with "_" so they both cmp the same.
    471     - removes '-inl' since we don't require them to be after the main header.
    472     - lowercase everything, just in case.
    473 
    474     Args:
    475       header_path: Path to be canonicalized.
    476 
    477     Returns:
    478       Canonicalized path.
    479     """
    480     return header_path.replace('-inl.h', '.h').replace('-', '_').lower()
    481 
    482   def IsInAlphabeticalOrder(self, header_path):
    483     """Check if a header is in alphabetical order with the previous header.
    484 
    485     Args:
    486       header_path: Header to be checked.
    487 
    488     Returns:
    489       Returns true if the header is in alphabetical order.
    490     """
    491     canonical_header = self.CanonicalizeAlphabeticalOrder(header_path)
    492     if self._last_header > canonical_header:
    493       return False
    494     self._last_header = canonical_header
    495     return True
    496 
    497   def CheckNextIncludeOrder(self, header_type):
    498     """Returns a non-empty error message if the next header is out of order.
    499 
    500     This function also updates the internal state to be ready to check
    501     the next include.
    502 
    503     Args:
    504       header_type: One of the _XXX_HEADER constants defined above.
    505 
    506     Returns:
    507       The empty string if the header is in the right order, or an
    508       error message describing what's wrong.
    509 
    510     """
    511     error_message = ('Found %s after %s' %
    512                      (self._TYPE_NAMES[header_type],
    513                       self._SECTION_NAMES[self._section]))
    514 
    515     last_section = self._section
    516 
    517     if header_type == _C_SYS_HEADER:
    518       if self._section <= self._C_SECTION:
    519         self._section = self._C_SECTION
    520       else:
    521         self._last_header = ''
    522         return error_message
    523     elif header_type == _CPP_SYS_HEADER:
    524       if self._section <= self._CPP_SECTION:
    525         self._section = self._CPP_SECTION
    526       else:
    527         self._last_header = ''
    528         return error_message
    529     elif header_type == _LIKELY_MY_HEADER:
    530       if self._section <= self._MY_H_SECTION:
    531         self._section = self._MY_H_SECTION
    532       else:
    533         self._section = self._OTHER_H_SECTION
    534     elif header_type == _POSSIBLE_MY_HEADER:
    535       if self._section <= self._MY_H_SECTION:
    536         self._section = self._MY_H_SECTION
    537       else:
    538         # This will always be the fallback because we're not sure
    539         # enough that the header is associated with this file.
    540         self._section = self._OTHER_H_SECTION
    541     else:
    542       assert header_type == _OTHER_HEADER
    543       self._section = self._OTHER_H_SECTION
    544 
    545     if last_section != self._section:
    546       self._last_header = ''
    547 
    548     return ''
    549 
    550 
    551 class _CppLintState(object):
    552   """Maintains module-wide state.."""
    553 
    554   def __init__(self):
    555     self.verbose_level = 1  # global setting.
    556     self.error_count = 0    # global count of reported errors
    557     # filters to apply when emitting error messages
    558     self.filters = _DEFAULT_FILTERS[:]
    559     self.counting = 'total'  # In what way are we counting errors?
    560     self.errors_by_category = {}  # string to int dict storing error counts
    561 
    562     # output format:
    563     # "emacs" - format that emacs can parse (default)
    564     # "vs7" - format that Microsoft Visual Studio 7 can parse
    565     self.output_format = 'emacs'
    566 
    567   def SetOutputFormat(self, output_format):
    568     """Sets the output format for errors."""
    569     self.output_format = output_format
    570 
    571   def SetVerboseLevel(self, level):
    572     """Sets the module's verbosity, and returns the previous setting."""
    573     last_verbose_level = self.verbose_level
    574     self.verbose_level = level
    575     return last_verbose_level
    576 
    577   def SetCountingStyle(self, counting_style):
    578     """Sets the module's counting options."""
    579     self.counting = counting_style
    580 
    581   def SetFilters(self, filters):
    582     """Sets the error-message filters.
    583 
    584     These filters are applied when deciding whether to emit a given
    585     error message.
    586 
    587     Args:
    588       filters: A string of comma-separated filters (eg "+whitespace/indent").
    589                Each filter should start with + or -; else we die.
    590 
    591     Raises:
    592       ValueError: The comma-separated filters did not all start with '+' or '-'.
    593                   E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter"
    594     """
    595     # Default filters always have less priority than the flag ones.
    596     self.filters = _DEFAULT_FILTERS[:]
    597     for filt in filters.split(','):
    598       clean_filt = filt.strip()
    599       if clean_filt:
    600         self.filters.append(clean_filt)
    601     for filt in self.filters:
    602       if not (filt.startswith('+') or filt.startswith('-')):
    603         raise ValueError('Every filter in --filters must start with + or -'
    604                          ' (%s does not)' % filt)
    605 
    606   def ResetErrorCounts(self):
    607     """Sets the module's error statistic back to zero."""
    608     self.error_count = 0
    609     self.errors_by_category = {}
    610 
    611   def IncrementErrorCount(self, category):
    612     """Bumps the module's error statistic."""
    613     self.error_count += 1
    614     if self.counting in ('toplevel', 'detailed'):
    615       if self.counting != 'detailed':
    616         category = category.split('/')[0]
    617       if category not in self.errors_by_category:
    618         self.errors_by_category[category] = 0
    619       self.errors_by_category[category] += 1
    620 
    621   def PrintErrorCounts(self):
    622     """Print a summary of errors by category, and the total."""
    623     for category, count in self.errors_by_category.iteritems():
    624       sys.stderr.write('Category \'%s\' errors found: %d\n' %
    625                        (category, count))
    626     sys.stderr.write('Total errors found: %d\n' % self.error_count)
    627 
    628 _cpplint_state = _CppLintState()
    629 
    630 
    631 def _OutputFormat():
    632   """Gets the module's output format."""
    633   return _cpplint_state.output_format
    634 
    635 
    636 def _SetOutputFormat(output_format):
    637   """Sets the module's output format."""
    638   _cpplint_state.SetOutputFormat(output_format)
    639 
    640 
    641 def _VerboseLevel():
    642   """Returns the module's verbosity setting."""
    643   return _cpplint_state.verbose_level
    644 
    645 
    646 def _SetVerboseLevel(level):
    647   """Sets the module's verbosity, and returns the previous setting."""
    648   return _cpplint_state.SetVerboseLevel(level)
    649 
    650 
    651 def _SetCountingStyle(level):
    652   """Sets the module's counting options."""
    653   _cpplint_state.SetCountingStyle(level)
    654 
    655 
    656 def _Filters():
    657   """Returns the module's list of output filters, as a list."""
    658   return _cpplint_state.filters
    659 
    660 
    661 def _SetFilters(filters):
    662   """Sets the module's error-message filters.
    663 
    664   These filters are applied when deciding whether to emit a given
    665   error message.
    666 
    667   Args:
    668     filters: A string of comma-separated filters (eg "whitespace/indent").
    669              Each filter should start with + or -; else we die.
    670   """
    671   _cpplint_state.SetFilters(filters)
    672 
    673 
    674 class _FunctionState(object):
    675   """Tracks current function name and the number of lines in its body."""
    676 
    677   _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
    678   _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
    679 
    680   def __init__(self):
    681     self.in_a_function = False
    682     self.lines_in_function = 0
    683     self.current_function = ''
    684 
    685   def Begin(self, function_name):
    686     """Start analyzing function body.
    687 
    688     Args:
    689       function_name: The name of the function being tracked.
    690     """
    691     self.in_a_function = True
    692     self.lines_in_function = 0
    693     self.current_function = function_name
    694 
    695   def Count(self):
    696     """Count line in current function body."""
    697     if self.in_a_function:
    698       self.lines_in_function += 1
    699 
    700   def Check(self, error, filename, linenum):
    701     """Report if too many lines in function body.
    702 
    703     Args:
    704       error: The function to call with any errors found.
    705       filename: The name of the current file.
    706       linenum: The number of the line to check.
    707     """
    708     # BEGIN android-added
    709     if not self.in_a_function:
    710       return
    711     # END android-added
    712     if Match(r'T(EST|est)', self.current_function):
    713       base_trigger = self._TEST_TRIGGER
    714     else:
    715       base_trigger = self._NORMAL_TRIGGER
    716     trigger = base_trigger * 2**_VerboseLevel()
    717 
    718     if self.lines_in_function > trigger:
    719       error_level = int(math.log(self.lines_in_function / base_trigger, 2))
    720       # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
    721       if error_level > 5:
    722         error_level = 5
    723       error(filename, linenum, 'readability/fn_size', error_level,
    724             'Small and focused functions are preferred:'
    725             ' %s has %d non-comment lines'
    726             ' (error triggered by exceeding %d lines).'  % (
    727                 self.current_function, self.lines_in_function, trigger))
    728 
    729   def End(self):
    730     """Stop analyzing function body."""
    731     self.in_a_function = False
    732 
    733 
    734 class _IncludeError(Exception):
    735   """Indicates a problem with the include order in a file."""
    736   pass
    737 
    738 
    739 class FileInfo:
    740   """Provides utility functions for filenames.
    741 
    742   FileInfo provides easy access to the components of a file's path
    743   relative to the project root.
    744   """
    745 
    746   def __init__(self, filename):
    747     self._filename = filename
    748 
    749   def FullName(self):
    750     """Make Windows paths like Unix."""
    751     return os.path.abspath(self._filename).replace('\\', '/')
    752 
    753   def RepositoryName(self):
    754     """FullName after removing the local path to the repository.
    755 
    756     If we have a real absolute path name here we can try to do something smart:
    757     detecting the root of the checkout and truncating /path/to/checkout from
    758     the name so that we get header guards that don't include things like
    759     "C:\Documents and Settings\..." or "/home/username/..." in them and thus
    760     people on different computers who have checked the source out to different
    761     locations won't see bogus errors.
    762     """
    763     fullname = self.FullName()
    764 
    765     if os.path.exists(fullname):
    766       project_dir = os.path.dirname(fullname)
    767 
    768       if os.path.exists(os.path.join(project_dir, ".svn")):
    769         # If there's a .svn file in the current directory, we recursively look
    770         # up the directory tree for the top of the SVN checkout
    771         root_dir = project_dir
    772         one_up_dir = os.path.dirname(root_dir)
    773         while os.path.exists(os.path.join(one_up_dir, ".svn")):
    774           root_dir = os.path.dirname(root_dir)
    775           one_up_dir = os.path.dirname(one_up_dir)
    776 
    777         prefix = os.path.commonprefix([root_dir, project_dir])
    778         return fullname[len(prefix) + 1:]
    779 
    780       # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by
    781       # searching up from the current path.
    782       root_dir = os.path.dirname(fullname)
    783       while (root_dir != os.path.dirname(root_dir) and
    784              not os.path.exists(os.path.join(root_dir, ".git")) and
    785              not os.path.exists(os.path.join(root_dir, ".hg")) and
    786              not os.path.exists(os.path.join(root_dir, ".svn"))):
    787         root_dir = os.path.dirname(root_dir)
    788 
    789       if (os.path.exists(os.path.join(root_dir, ".git")) or
    790           os.path.exists(os.path.join(root_dir, ".hg")) or
    791           os.path.exists(os.path.join(root_dir, ".svn"))):
    792         prefix = os.path.commonprefix([root_dir, project_dir])
    793         # BEGIN android-changed
    794         # return fullname[len(prefix) + 1:]
    795         return "art/" + fullname[len(prefix) + 1:]
    796         # END android-changed
    797 
    798     # Don't know what to do; header guard warnings may be wrong...
    799     return fullname
    800 
    801   def Split(self):
    802     """Splits the file into the directory, basename, and extension.
    803 
    804     For 'chrome/browser/browser.cc', Split() would
    805     return ('chrome/browser', 'browser', '.cc')
    806 
    807     Returns:
    808       A tuple of (directory, basename, extension).
    809     """
    810 
    811     googlename = self.RepositoryName()
    812     project, rest = os.path.split(googlename)
    813     return (project,) + os.path.splitext(rest)
    814 
    815   def BaseName(self):
    816     """File base name - text after the final slash, before the final period."""
    817     return self.Split()[1]
    818 
    819   def Extension(self):
    820     """File extension - text following the final period."""
    821     return self.Split()[2]
    822 
    823   def NoExtension(self):
    824     """File has no source file extension."""
    825     return '/'.join(self.Split()[0:2])
    826 
    827   def IsSource(self):
    828     """File has a source file extension."""
    829     return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
    830 
    831 
    832 def _ShouldPrintError(category, confidence, linenum):
    833   """If confidence >= verbose, category passes filter and is not suppressed."""
    834 
    835   # There are three ways we might decide not to print an error message:
    836   # a "NOLINT(category)" comment appears in the source,
    837   # the verbosity level isn't high enough, or the filters filter it out.
    838   if IsErrorSuppressedByNolint(category, linenum):
    839     return False
    840   if confidence < _cpplint_state.verbose_level:
    841     return False
    842 
    843   is_filtered = False
    844   for one_filter in _Filters():
    845     if one_filter.startswith('-'):
    846       if category.startswith(one_filter[1:]):
    847         is_filtered = True
    848     elif one_filter.startswith('+'):
    849       if category.startswith(one_filter[1:]):
    850         is_filtered = False
    851     else:
    852       assert False  # should have been checked for in SetFilter.
    853   if is_filtered:
    854     return False
    855 
    856   return True
    857 
    858 
    859 def Error(filename, linenum, category, confidence, message):
    860   """Logs the fact we've found a lint error.
    861 
    862   We log where the error was found, and also our confidence in the error,
    863   that is, how certain we are this is a legitimate style regression, and
    864   not a misidentification or a use that's sometimes justified.
    865 
    866   False positives can be suppressed by the use of
    867   "cpplint(category)"  comments on the offending line.  These are
    868   parsed into _error_suppressions.
    869 
    870   Args:
    871     filename: The name of the file containing the error.
    872     linenum: The number of the line containing the error.
    873     category: A string used to describe the "category" this bug
    874       falls under: "whitespace", say, or "runtime".  Categories
    875       may have a hierarchy separated by slashes: "whitespace/indent".
    876     confidence: A number from 1-5 representing a confidence score for
    877       the error, with 5 meaning that we are certain of the problem,
    878       and 1 meaning that it could be a legitimate construct.
    879     message: The error message.
    880   """
    881   if _ShouldPrintError(category, confidence, linenum):
    882     _cpplint_state.IncrementErrorCount(category)
    883     if _cpplint_state.output_format == 'vs7':
    884       sys.stderr.write('%s(%s):  %s  [%s] [%d]\n' % (
    885           filename, linenum, message, category, confidence))
    886     elif _cpplint_state.output_format == 'eclipse':
    887       sys.stderr.write('%s:%s: warning: %s  [%s] [%d]\n' % (
    888           filename, linenum, message, category, confidence))
    889     else:
    890       sys.stderr.write('%s:%s:  %s  [%s] [%d]\n' % (
    891           filename, linenum, message, category, confidence))
    892 
    893 
    894 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
    895 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
    896     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
    897 # Matches strings.  Escape codes should already be removed by ESCAPES.
    898 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
    899 # Matches characters.  Escape codes should already be removed by ESCAPES.
    900 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
    901 # Matches multi-line C++ comments.
    902 # This RE is a little bit more complicated than one might expect, because we
    903 # have to take care of space removals tools so we can handle comments inside
    904 # statements better.
    905 # The current rule is: We only clear spaces from both sides when we're at the
    906 # end of the line. Otherwise, we try to remove spaces from the right side,
    907 # if this doesn't work we try on left side but only if there's a non-character
    908 # on the right.
    909 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
    910     r"""(\s*/\*.*\*/\s*$|
    911             /\*.*\*/\s+|
    912          \s+/\*.*\*/(?=\W)|
    913             /\*.*\*/)""", re.VERBOSE)
    914 
    915 
    916 def IsCppString(line):
    917   """Does line terminate so, that the next symbol is in string constant.
    918 
    919   This function does not consider single-line nor multi-line comments.
    920 
    921   Args:
    922     line: is a partial line of code starting from the 0..n.
    923 
    924   Returns:
    925     True, if next character appended to 'line' is inside a
    926     string constant.
    927   """
    928 
    929   line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
    930   return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
    931 
    932 
    933 def FindNextMultiLineCommentStart(lines, lineix):
    934   """Find the beginning marker for a multiline comment."""
    935   while lineix < len(lines):
    936     if lines[lineix].strip().startswith('/*'):
    937       # Only return this marker if the comment goes beyond this line
    938       if lines[lineix].strip().find('*/', 2) < 0:
    939         return lineix
    940     lineix += 1
    941   return len(lines)
    942 
    943 
    944 def FindNextMultiLineCommentEnd(lines, lineix):
    945   """We are inside a comment, find the end marker."""
    946   while lineix < len(lines):
    947     if lines[lineix].strip().endswith('*/'):
    948       return lineix
    949     lineix += 1
    950   return len(lines)
    951 
    952 
    953 def RemoveMultiLineCommentsFromRange(lines, begin, end):
    954   """Clears a range of lines for multi-line comments."""
    955   # Having // dummy comments makes the lines non-empty, so we will not get
    956   # unnecessary blank line warnings later in the code.
    957   for i in range(begin, end):
    958     lines[i] = '// dummy'
    959 
    960 
    961 def RemoveMultiLineComments(filename, lines, error):
    962   """Removes multiline (c-style) comments from lines."""
    963   lineix = 0
    964   while lineix < len(lines):
    965     lineix_begin = FindNextMultiLineCommentStart(lines, lineix)
    966     if lineix_begin >= len(lines):
    967       return
    968     lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin)
    969     if lineix_end >= len(lines):
    970       error(filename, lineix_begin + 1, 'readability/multiline_comment', 5,
    971             'Could not find end of multi-line comment')
    972       return
    973     RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1)
    974     lineix = lineix_end + 1
    975 
    976 
    977 def CleanseComments(line):
    978   """Removes //-comments and single-line C-style /* */ comments.
    979 
    980   Args:
    981     line: A line of C++ source.
    982 
    983   Returns:
    984     The line with single-line comments removed.
    985   """
    986   commentpos = line.find('//')
    987   if commentpos != -1 and not IsCppString(line[:commentpos]):
    988     line = line[:commentpos].rstrip()
    989   # get rid of /* ... */
    990   return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
    991 
    992 
    993 class CleansedLines(object):
    994   """Holds 3 copies of all lines with different preprocessing applied to them.
    995 
    996   1) elided member contains lines without strings and comments,
    997   2) lines member contains lines without comments, and
    998   3) raw_lines member contains all the lines without processing.
    999   All these three members are of <type 'list'>, and of the same length.
   1000   """
   1001 
   1002   def __init__(self, lines):
   1003     self.elided = []
   1004     self.lines = []
   1005     self.raw_lines = lines
   1006     self.num_lines = len(lines)
   1007     for linenum in range(len(lines)):
   1008       self.lines.append(CleanseComments(lines[linenum]))
   1009       elided = self._CollapseStrings(lines[linenum])
   1010       self.elided.append(CleanseComments(elided))
   1011 
   1012   def NumLines(self):
   1013     """Returns the number of lines represented."""
   1014     return self.num_lines
   1015 
   1016   @staticmethod
   1017   def _CollapseStrings(elided):
   1018     """Collapses strings and chars on a line to simple "" or '' blocks.
   1019 
   1020     We nix strings first so we're not fooled by text like '"http://"'
   1021 
   1022     Args:
   1023       elided: The line being processed.
   1024 
   1025     Returns:
   1026       The line with collapsed strings.
   1027     """
   1028     if not _RE_PATTERN_INCLUDE.match(elided):
   1029       # Remove escaped characters first to make quote/single quote collapsing
   1030       # basic.  Things that look like escaped characters shouldn't occur
   1031       # outside of strings and chars.
   1032       elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
   1033       elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
   1034       elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
   1035     return elided
   1036 
   1037 
   1038 def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar):
   1039   """Find the position just after the matching endchar.
   1040 
   1041   Args:
   1042     line: a CleansedLines line.
   1043     startpos: start searching at this position.
   1044     depth: nesting level at startpos.
   1045     startchar: expression opening character.
   1046     endchar: expression closing character.
   1047 
   1048   Returns:
   1049     Index just after endchar.
   1050   """
   1051   for i in xrange(startpos, len(line)):
   1052     if line[i] == startchar:
   1053       depth += 1
   1054     elif line[i] == endchar:
   1055       depth -= 1
   1056       if depth == 0:
   1057         return i + 1
   1058   return -1
   1059 
   1060 
   1061 def CloseExpression(clean_lines, linenum, pos):
   1062   """If input points to ( or { or [, finds the position that closes it.
   1063 
   1064   If lines[linenum][pos] points to a '(' or '{' or '[', finds the
   1065   linenum/pos that correspond to the closing of the expression.
   1066 
   1067   Args:
   1068     clean_lines: A CleansedLines instance containing the file.
   1069     linenum: The number of the line to check.
   1070     pos: A position on the line.
   1071 
   1072   Returns:
   1073     A tuple (line, linenum, pos) pointer *past* the closing brace, or
   1074     (line, len(lines), -1) if we never find a close.  Note we ignore
   1075     strings and comments when matching; and the line we return is the
   1076     'cleansed' line at linenum.
   1077   """
   1078 
   1079   line = clean_lines.elided[linenum]
   1080   startchar = line[pos]
   1081   if startchar not in '({[':
   1082     return (line, clean_lines.NumLines(), -1)
   1083   if startchar == '(': endchar = ')'
   1084   if startchar == '[': endchar = ']'
   1085   if startchar == '{': endchar = '}'
   1086 
   1087   # Check first line
   1088   end_pos = FindEndOfExpressionInLine(line, pos, 0, startchar, endchar)
   1089   if end_pos > -1:
   1090     return (line, linenum, end_pos)
   1091   tail = line[pos:]
   1092   num_open = tail.count(startchar) - tail.count(endchar)
   1093   while linenum < clean_lines.NumLines() - 1:
   1094     linenum += 1
   1095     line = clean_lines.elided[linenum]
   1096     delta = line.count(startchar) - line.count(endchar)
   1097     if num_open + delta <= 0:
   1098       return (line, linenum,
   1099               FindEndOfExpressionInLine(line, 0, num_open, startchar, endchar))
   1100     num_open += delta
   1101 
   1102   # Did not find endchar before end of file, give up
   1103   return (line, clean_lines.NumLines(), -1)
   1104 
   1105 def CheckForCopyright(filename, lines, error):
   1106   """Logs an error if no Copyright message appears at the top of the file."""
   1107 
   1108   # We'll say it should occur by line 10. Don't forget there's a
   1109   # dummy line at the front.
   1110   for line in xrange(1, min(len(lines), 11)):
   1111     if re.search(r'Copyright', lines[line], re.I): break
   1112   else:                       # means no copyright line was found
   1113     error(filename, 0, 'legal/copyright', 5,
   1114           'No copyright message found.  '
   1115           'You should have a line: "Copyright [year] <Copyright Owner>"')
   1116 
   1117 
   1118 def GetHeaderGuardCPPVariable(filename):
   1119   """Returns the CPP variable that should be used as a header guard.
   1120 
   1121   Args:
   1122     filename: The name of a C++ header file.
   1123 
   1124   Returns:
   1125     The CPP variable that should be used as a header guard in the
   1126     named file.
   1127 
   1128   """
   1129 
   1130   # Restores original filename in case that cpplint is invoked from Emacs's
   1131   # flymake.
   1132   filename = re.sub(r'_flymake\.h$', '.h', filename)
   1133   filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename)
   1134 
   1135   fileinfo = FileInfo(filename)
   1136   file_path_from_root = fileinfo.RepositoryName()
   1137   if _root:
   1138     file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root)
   1139   return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_'
   1140 
   1141 
   1142 def CheckForHeaderGuard(filename, lines, error):
   1143   """Checks that the file contains a header guard.
   1144 
   1145   Logs an error if no #ifndef header guard is present.  For other
   1146   headers, checks that the full pathname is used.
   1147 
   1148   Args:
   1149     filename: The name of the C++ header file.
   1150     lines: An array of strings, each representing a line of the file.
   1151     error: The function to call with any errors found.
   1152   """
   1153 
   1154   cppvar = GetHeaderGuardCPPVariable(filename)
   1155 
   1156   ifndef = None
   1157   ifndef_linenum = 0
   1158   define = None
   1159   endif = None
   1160   endif_linenum = 0
   1161   for linenum, line in enumerate(lines):
   1162     linesplit = line.split()
   1163     if len(linesplit) >= 2:
   1164       # find the first occurrence of #ifndef and #define, save arg
   1165       if not ifndef and linesplit[0] == '#ifndef':
   1166         # set ifndef to the header guard presented on the #ifndef line.
   1167         ifndef = linesplit[1]
   1168         ifndef_linenum = linenum
   1169       if not define and linesplit[0] == '#define':
   1170         define = linesplit[1]
   1171     # find the last occurrence of #endif, save entire line
   1172     if line.startswith('#endif'):
   1173       endif = line
   1174       endif_linenum = linenum
   1175 
   1176   if not ifndef:
   1177     error(filename, 0, 'build/header_guard', 5,
   1178           'No #ifndef header guard found, suggested CPP variable is: %s' %
   1179           cppvar)
   1180     return
   1181 
   1182   if not define:
   1183     error(filename, 0, 'build/header_guard', 5,
   1184           'No #define header guard found, suggested CPP variable is: %s' %
   1185           cppvar)
   1186     return
   1187 
   1188   # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__
   1189   # for backward compatibility.
   1190   if ifndef != cppvar:
   1191     error_level = 0
   1192     if ifndef != cppvar + '_':
   1193       error_level = 5
   1194 
   1195     ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum,
   1196                             error)
   1197     error(filename, ifndef_linenum, 'build/header_guard', error_level,
   1198           '#ifndef header guard has wrong style, please use: %s' % cppvar)
   1199 
   1200   if define != ifndef:
   1201     error(filename, 0, 'build/header_guard', 5,
   1202           '#ifndef and #define don\'t match, suggested CPP variable is: %s' %
   1203           cppvar)
   1204     return
   1205 
   1206   if endif != ('#endif  // %s' % cppvar):
   1207     error_level = 0
   1208     if endif != ('#endif  // %s' % (cppvar + '_')):
   1209       error_level = 5
   1210 
   1211     ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum,
   1212                             error)
   1213     error(filename, endif_linenum, 'build/header_guard', error_level,
   1214           '#endif line should be "#endif  // %s"' % cppvar)
   1215 
   1216 
   1217 def CheckForUnicodeReplacementCharacters(filename, lines, error):
   1218   """Logs an error for each line containing Unicode replacement characters.
   1219 
   1220   These indicate that either the file contained invalid UTF-8 (likely)
   1221   or Unicode replacement characters (which it shouldn't).  Note that
   1222   it's possible for this to throw off line numbering if the invalid
   1223   UTF-8 occurred adjacent to a newline.
   1224 
   1225   Args:
   1226     filename: The name of the current file.
   1227     lines: An array of strings, each representing a line of the file.
   1228     error: The function to call with any errors found.
   1229   """
   1230   for linenum, line in enumerate(lines):
   1231     if u'\ufffd' in line:
   1232       error(filename, linenum, 'readability/utf8', 5,
   1233             'Line contains invalid UTF-8 (or Unicode replacement character).')
   1234 
   1235 
   1236 def CheckForNewlineAtEOF(filename, lines, error):
   1237   """Logs an error if there is no newline char at the end of the file.
   1238 
   1239   Args:
   1240     filename: The name of the current file.
   1241     lines: An array of strings, each representing a line of the file.
   1242     error: The function to call with any errors found.
   1243   """
   1244 
   1245   # The array lines() was created by adding two newlines to the
   1246   # original file (go figure), then splitting on \n.
   1247   # To verify that the file ends in \n, we just have to make sure the
   1248   # last-but-two element of lines() exists and is empty.
   1249   if len(lines) < 3 or lines[-2]:
   1250     error(filename, len(lines) - 2, 'whitespace/ending_newline', 5,
   1251           'Could not find a newline character at the end of the file.')
   1252 
   1253 
   1254 def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error):
   1255   """Logs an error if we see /* ... */ or "..." that extend past one line.
   1256 
   1257   /* ... */ comments are legit inside macros, for one line.
   1258   Otherwise, we prefer // comments, so it's ok to warn about the
   1259   other.  Likewise, it's ok for strings to extend across multiple
   1260   lines, as long as a line continuation character (backslash)
   1261   terminates each line. Although not currently prohibited by the C++
   1262   style guide, it's ugly and unnecessary. We don't do well with either
   1263   in this lint program, so we warn about both.
   1264 
   1265   Args:
   1266     filename: The name of the current file.
   1267     clean_lines: A CleansedLines instance containing the file.
   1268     linenum: The number of the line to check.
   1269     error: The function to call with any errors found.
   1270   """
   1271   line = clean_lines.elided[linenum]
   1272 
   1273   # Remove all \\ (escaped backslashes) from the line. They are OK, and the
   1274   # second (escaped) slash may trigger later \" detection erroneously.
   1275   line = line.replace('\\\\', '')
   1276 
   1277   if line.count('/*') > line.count('*/'):
   1278     error(filename, linenum, 'readability/multiline_comment', 5,
   1279           'Complex multi-line /*...*/-style comment found. '
   1280           'Lint may give bogus warnings.  '
   1281           'Consider replacing these with //-style comments, '
   1282           'with #if 0...#endif, '
   1283           'or with more clearly structured multi-line comments.')
   1284 
   1285   if (line.count('"') - line.count('\\"')) % 2:
   1286     error(filename, linenum, 'readability/multiline_string', 5,
   1287           'Multi-line string ("...") found.  This lint script doesn\'t '
   1288           'do well with such strings, and may give bogus warnings.  They\'re '
   1289           'ugly and unnecessary, and you should use concatenation instead".')
   1290 
   1291 
   1292 threading_list = (
   1293     ('asctime(', 'asctime_r('),
   1294     ('ctime(', 'ctime_r('),
   1295     ('getgrgid(', 'getgrgid_r('),
   1296     ('getgrnam(', 'getgrnam_r('),
   1297     ('getlogin(', 'getlogin_r('),
   1298     ('getpwnam(', 'getpwnam_r('),
   1299     ('getpwuid(', 'getpwuid_r('),
   1300     ('gmtime(', 'gmtime_r('),
   1301     ('localtime(', 'localtime_r('),
   1302     ('rand(', 'rand_r('),
   1303     ('readdir(', 'readdir_r('),
   1304     ('strtok(', 'strtok_r('),
   1305     ('ttyname(', 'ttyname_r('),
   1306     )
   1307 
   1308 
   1309 def CheckPosixThreading(filename, clean_lines, linenum, error):
   1310   """Checks for calls to thread-unsafe functions.
   1311 
   1312   Much code has been originally written without consideration of
   1313   multi-threading. Also, engineers are relying on their old experience;
   1314   they have learned posix before threading extensions were added. These
   1315   tests guide the engineers to use thread-safe functions (when using
   1316   posix directly).
   1317 
   1318   Args:
   1319     filename: The name of the current file.
   1320     clean_lines: A CleansedLines instance containing the file.
   1321     linenum: The number of the line to check.
   1322     error: The function to call with any errors found.
   1323   """
   1324   line = clean_lines.elided[linenum]
   1325   for single_thread_function, multithread_safe_function in threading_list:
   1326     ix = line.find(single_thread_function)
   1327     # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
   1328     if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and
   1329                                 line[ix - 1] not in ('_', '.', '>'))):
   1330       error(filename, linenum, 'runtime/threadsafe_fn', 2,
   1331             'Consider using ' + multithread_safe_function +
   1332             '...) instead of ' + single_thread_function +
   1333             '...) for improved thread safety.')
   1334 
   1335 
   1336 # Matches invalid increment: *count++, which moves pointer instead of
   1337 # incrementing a value.
   1338 _RE_PATTERN_INVALID_INCREMENT = re.compile(
   1339     r'^\s*\*\w+(\+\+|--);')
   1340 
   1341 
   1342 def CheckInvalidIncrement(filename, clean_lines, linenum, error):
   1343   """Checks for invalid increment *count++.
   1344 
   1345   For example following function:
   1346   void increment_counter(int* count) {
   1347     *count++;
   1348   }
   1349   is invalid, because it effectively does count++, moving pointer, and should
   1350   be replaced with ++*count, (*count)++ or *count += 1.
   1351 
   1352   Args:
   1353     filename: The name of the current file.
   1354     clean_lines: A CleansedLines instance containing the file.
   1355     linenum: The number of the line to check.
   1356     error: The function to call with any errors found.
   1357   """
   1358   line = clean_lines.elided[linenum]
   1359   if _RE_PATTERN_INVALID_INCREMENT.match(line):
   1360     error(filename, linenum, 'runtime/invalid_increment', 5,
   1361           'Changing pointer instead of value (or unused value of operator*).')
   1362 
   1363 
   1364 class _BlockInfo(object):
   1365   """Stores information about a generic block of code."""
   1366 
   1367   def __init__(self, seen_open_brace):
   1368     self.seen_open_brace = seen_open_brace
   1369     self.open_parentheses = 0
   1370     self.inline_asm = _NO_ASM
   1371 
   1372   def CheckBegin(self, filename, clean_lines, linenum, error):
   1373     """Run checks that applies to text up to the opening brace.
   1374 
   1375     This is mostly for checking the text after the class identifier
   1376     and the "{", usually where the base class is specified.  For other
   1377     blocks, there isn't much to check, so we always pass.
   1378 
   1379     Args:
   1380       filename: The name of the current file.
   1381       clean_lines: A CleansedLines instance containing the file.
   1382       linenum: The number of the line to check.
   1383       error: The function to call with any errors found.
   1384     """
   1385     pass
   1386 
   1387   def CheckEnd(self, filename, clean_lines, linenum, error):
   1388     """Run checks that applies to text after the closing brace.
   1389 
   1390     This is mostly used for checking end of namespace comments.
   1391 
   1392     Args:
   1393       filename: The name of the current file.
   1394       clean_lines: A CleansedLines instance containing the file.
   1395       linenum: The number of the line to check.
   1396       error: The function to call with any errors found.
   1397     """
   1398     pass
   1399 
   1400 
   1401 class _ClassInfo(_BlockInfo):
   1402   """Stores information about a class."""
   1403 
   1404   def __init__(self, name, class_or_struct, clean_lines, linenum):
   1405     _BlockInfo.__init__(self, False)
   1406     self.name = name
   1407     self.starting_linenum = linenum
   1408     self.is_derived = False
   1409     if class_or_struct == 'struct':
   1410       self.access = 'public'
   1411     else:
   1412       self.access = 'private'
   1413 
   1414     # Try to find the end of the class.  This will be confused by things like:
   1415     #   class A {
   1416     #   } *x = { ...
   1417     #
   1418     # But it's still good enough for CheckSectionSpacing.
   1419     self.last_line = 0
   1420     depth = 0
   1421     for i in range(linenum, clean_lines.NumLines()):
   1422       line = clean_lines.elided[i]
   1423       depth += line.count('{') - line.count('}')
   1424       if not depth:
   1425         self.last_line = i
   1426         break
   1427 
   1428   def CheckBegin(self, filename, clean_lines, linenum, error):
   1429     # Look for a bare ':'
   1430     if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]):
   1431       self.is_derived = True
   1432 
   1433 
   1434 class _NamespaceInfo(_BlockInfo):
   1435   """Stores information about a namespace."""
   1436 
   1437   def __init__(self, name, linenum):
   1438     _BlockInfo.__init__(self, False)
   1439     self.name = name or ''
   1440     self.starting_linenum = linenum
   1441 
   1442   def CheckEnd(self, filename, clean_lines, linenum, error):
   1443     """Check end of namespace comments."""
   1444     line = clean_lines.raw_lines[linenum]
   1445 
   1446     # Check how many lines is enclosed in this namespace.  Don't issue
   1447     # warning for missing namespace comments if there aren't enough
   1448     # lines.  However, do apply checks if there is already an end of
   1449     # namespace comment and it's incorrect.
   1450     #
   1451     # TODO(unknown): We always want to check end of namespace comments
   1452     # if a namespace is large, but sometimes we also want to apply the
   1453     # check if a short namespace contained nontrivial things (something
   1454     # other than forward declarations).  There is currently no logic on
   1455     # deciding what these nontrivial things are, so this check is
   1456     # triggered by namespace size only, which works most of the time.
   1457     if (linenum - self.starting_linenum < 10
   1458         and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)):
   1459       return
   1460 
   1461     # Look for matching comment at end of namespace.
   1462     #
   1463     # Note that we accept C style "/* */" comments for terminating
   1464     # namespaces, so that code that terminate namespaces inside
   1465     # preprocessor macros can be cpplint clean.  Example: http://go/nxpiz
   1466     #
   1467     # We also accept stuff like "// end of namespace <name>." with the
   1468     # period at the end.
   1469     #
   1470     # Besides these, we don't accept anything else, otherwise we might
   1471     # get false negatives when existing comment is a substring of the
   1472     # expected namespace.  Example: http://go/ldkdc, http://cl/23548205
   1473     if self.name:
   1474       # Named namespace
   1475       if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) +
   1476                     r'[\*/\.\\\s]*$'),
   1477                    line):
   1478         error(filename, linenum, 'readability/namespace', 5,
   1479               'Namespace should be terminated with "// namespace %s"' %
   1480               self.name)
   1481     else:
   1482       # Anonymous namespace
   1483       if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line):
   1484         error(filename, linenum, 'readability/namespace', 5,
   1485               'Namespace should be terminated with "// namespace"')
   1486 
   1487 
   1488 class _PreprocessorInfo(object):
   1489   """Stores checkpoints of nesting stacks when #if/#else is seen."""
   1490 
   1491   def __init__(self, stack_before_if):
   1492     # The entire nesting stack before #if
   1493     self.stack_before_if = stack_before_if
   1494 
   1495     # The entire nesting stack up to #else
   1496     self.stack_before_else = []
   1497 
   1498     # Whether we have already seen #else or #elif
   1499     self.seen_else = False
   1500 
   1501 
   1502 class _NestingState(object):
   1503   """Holds states related to parsing braces."""
   1504 
   1505   def __init__(self):
   1506     # Stack for tracking all braces.  An object is pushed whenever we
   1507     # see a "{", and popped when we see a "}".  Only 3 types of
   1508     # objects are possible:
   1509     # - _ClassInfo: a class or struct.
   1510     # - _NamespaceInfo: a namespace.
   1511     # - _BlockInfo: some other type of block.
   1512     self.stack = []
   1513 
   1514     # Stack of _PreprocessorInfo objects.
   1515     self.pp_stack = []
   1516 
   1517   def SeenOpenBrace(self):
   1518     """Check if we have seen the opening brace for the innermost block.
   1519 
   1520     Returns:
   1521       True if we have seen the opening brace, False if the innermost
   1522       block is still expecting an opening brace.
   1523     """
   1524     return (not self.stack) or self.stack[-1].seen_open_brace
   1525 
   1526   def InNamespaceBody(self):
   1527     """Check if we are currently one level inside a namespace body.
   1528 
   1529     Returns:
   1530       True if top of the stack is a namespace block, False otherwise.
   1531     """
   1532     return self.stack and isinstance(self.stack[-1], _NamespaceInfo)
   1533 
   1534   def UpdatePreprocessor(self, line):
   1535     """Update preprocessor stack.
   1536 
   1537     We need to handle preprocessors due to classes like this:
   1538       #ifdef SWIG
   1539       struct ResultDetailsPageElementExtensionPoint {
   1540       #else
   1541       struct ResultDetailsPageElementExtensionPoint : public Extension {
   1542       #endif
   1543     (see http://go/qwddn for original example)
   1544 
   1545     We make the following assumptions (good enough for most files):
   1546     - Preprocessor condition evaluates to true from #if up to first
   1547       #else/#elif/#endif.
   1548 
   1549     - Preprocessor condition evaluates to false from #else/#elif up
   1550       to #endif.  We still perform lint checks on these lines, but
   1551       these do not affect nesting stack.
   1552 
   1553     Args:
   1554       line: current line to check.
   1555     """
   1556     if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line):
   1557       # Beginning of #if block, save the nesting stack here.  The saved
   1558       # stack will allow us to restore the parsing state in the #else case.
   1559       self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack)))
   1560     elif Match(r'^\s*#\s*(else|elif)\b', line):
   1561       # Beginning of #else block
   1562       if self.pp_stack:
   1563         if not self.pp_stack[-1].seen_else:
   1564           # This is the first #else or #elif block.  Remember the
   1565           # whole nesting stack up to this point.  This is what we
   1566           # keep after the #endif.
   1567           self.pp_stack[-1].seen_else = True
   1568           self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack)
   1569 
   1570         # Restore the stack to how it was before the #if
   1571         self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if)
   1572       else:
   1573         # TODO(unknown): unexpected #else, issue warning?
   1574         pass
   1575     elif Match(r'^\s*#\s*endif\b', line):
   1576       # End of #if or #else blocks.
   1577       if self.pp_stack:
   1578         # If we saw an #else, we will need to restore the nesting
   1579         # stack to its former state before the #else, otherwise we
   1580         # will just continue from where we left off.
   1581         if self.pp_stack[-1].seen_else:
   1582           # Here we can just use a shallow copy since we are the last
   1583           # reference to it.
   1584           self.stack = self.pp_stack[-1].stack_before_else
   1585         # Drop the corresponding #if
   1586         self.pp_stack.pop()
   1587       else:
   1588         # TODO(unknown): unexpected #endif, issue warning?
   1589         pass
   1590 
   1591   def Update(self, filename, clean_lines, linenum, error):
   1592     """Update nesting state with current line.
   1593 
   1594     Args:
   1595       filename: The name of the current file.
   1596       clean_lines: A CleansedLines instance containing the file.
   1597       linenum: The number of the line to check.
   1598       error: The function to call with any errors found.
   1599     """
   1600     line = clean_lines.elided[linenum]
   1601 
   1602     # Update pp_stack first
   1603     self.UpdatePreprocessor(line)
   1604 
   1605     # Count parentheses.  This is to avoid adding struct arguments to
   1606     # the nesting stack.
   1607     if self.stack:
   1608       inner_block = self.stack[-1]
   1609       depth_change = line.count('(') - line.count(')')
   1610       inner_block.open_parentheses += depth_change
   1611 
   1612       # Also check if we are starting or ending an inline assembly block.
   1613       if inner_block.inline_asm in (_NO_ASM, _END_ASM):
   1614         if (depth_change != 0 and
   1615             inner_block.open_parentheses == 1 and
   1616             _MATCH_ASM.match(line)):
   1617           # Enter assembly block
   1618           inner_block.inline_asm = _INSIDE_ASM
   1619         else:
   1620           # Not entering assembly block.  If previous line was _END_ASM,
   1621           # we will now shift to _NO_ASM state.
   1622           inner_block.inline_asm = _NO_ASM
   1623       elif (inner_block.inline_asm == _INSIDE_ASM and
   1624             inner_block.open_parentheses == 0):
   1625         # Exit assembly block
   1626         inner_block.inline_asm = _END_ASM
   1627 
   1628     # Consume namespace declaration at the beginning of the line.  Do
   1629     # this in a loop so that we catch same line declarations like this:
   1630     #   namespace proto2 { namespace bridge { class MessageSet; } }
   1631     while True:
   1632       # Match start of namespace.  The "\b\s*" below catches namespace
   1633       # declarations even if it weren't followed by a whitespace, this
   1634       # is so that we don't confuse our namespace checker.  The
   1635       # missing spaces will be flagged by CheckSpacing.
   1636       namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line)
   1637       if not namespace_decl_match:
   1638         break
   1639 
   1640       new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum)
   1641       self.stack.append(new_namespace)
   1642 
   1643       line = namespace_decl_match.group(2)
   1644       if line.find('{') != -1:
   1645         new_namespace.seen_open_brace = True
   1646         line = line[line.find('{') + 1:]
   1647 
   1648     # Look for a class declaration in whatever is left of the line
   1649     # after parsing namespaces.  The regexp accounts for decorated classes
   1650     # such as in:
   1651     #   class LOCKABLE API Object {
   1652     #   };
   1653     #
   1654     # Templates with class arguments may confuse the parser, for example:
   1655     #   template <class T
   1656     #             class Comparator = less<T>,
   1657     #             class Vector = vector<T> >
   1658     #   class HeapQueue {
   1659     #
   1660     # Because this parser has no nesting state about templates, by the
   1661     # time it saw "class Comparator", it may think that it's a new class.
   1662     # Nested templates have a similar problem:
   1663     #   template <
   1664     #       typename ExportedType,
   1665     #       typename TupleType,
   1666     #       template <typename, typename> class ImplTemplate>
   1667     #
   1668     # To avoid these cases, we ignore classes that are followed by '=' or '>'
   1669     class_decl_match = Match(
   1670         r'\s*(template\s*<[\w\s<>,:]*>\s*)?'
   1671         '(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)'
   1672         '(([^=>]|<[^<>]*>)*)$', line)
   1673     if (class_decl_match and
   1674         (not self.stack or self.stack[-1].open_parentheses == 0)):
   1675       self.stack.append(_ClassInfo(
   1676           class_decl_match.group(4), class_decl_match.group(2),
   1677           clean_lines, linenum))
   1678       line = class_decl_match.group(5)
   1679 
   1680     # If we have not yet seen the opening brace for the innermost block,
   1681     # run checks here.
   1682     if not self.SeenOpenBrace():
   1683       self.stack[-1].CheckBegin(filename, clean_lines, linenum, error)
   1684 
   1685     # Update access control if we are inside a class/struct
   1686     if self.stack and isinstance(self.stack[-1], _ClassInfo):
   1687       access_match = Match(r'\s*(public|private|protected)\s*:', line)
   1688       if access_match:
   1689         self.stack[-1].access = access_match.group(1)
   1690 
   1691     # Consume braces or semicolons from what's left of the line
   1692     while True:
   1693       # Match first brace, semicolon, or closed parenthesis.
   1694       matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line)
   1695       if not matched:
   1696         break
   1697 
   1698       token = matched.group(1)
   1699       if token == '{':
   1700         # If namespace or class hasn't seen a opening brace yet, mark
   1701         # namespace/class head as complete.  Push a new block onto the
   1702         # stack otherwise.
   1703         if not self.SeenOpenBrace():
   1704           self.stack[-1].seen_open_brace = True
   1705         else:
   1706           self.stack.append(_BlockInfo(True))
   1707           if _MATCH_ASM.match(line):
   1708             self.stack[-1].inline_asm = _BLOCK_ASM
   1709       elif token == ';' or token == ')':
   1710         # If we haven't seen an opening brace yet, but we already saw
   1711         # a semicolon, this is probably a forward declaration.  Pop
   1712         # the stack for these.
   1713         #
   1714         # Similarly, if we haven't seen an opening brace yet, but we
   1715         # already saw a closing parenthesis, then these are probably
   1716         # function arguments with extra "class" or "struct" keywords.
   1717         # Also pop these stack for these.
   1718         if not self.SeenOpenBrace():
   1719           self.stack.pop()
   1720       else:  # token == '}'
   1721         # Perform end of block checks and pop the stack.
   1722         if self.stack:
   1723           self.stack[-1].CheckEnd(filename, clean_lines, linenum, error)
   1724           self.stack.pop()
   1725       line = matched.group(2)
   1726 
   1727   def InnermostClass(self):
   1728     """Get class info on the top of the stack.
   1729 
   1730     Returns:
   1731       A _ClassInfo object if we are inside a class, or None otherwise.
   1732     """
   1733     for i in range(len(self.stack), 0, -1):
   1734       classinfo = self.stack[i - 1]
   1735       if isinstance(classinfo, _ClassInfo):
   1736         return classinfo
   1737     return None
   1738 
   1739   def CheckClassFinished(self, filename, error):
   1740     """Checks that all classes have been completely parsed.
   1741 
   1742     Call this when all lines in a file have been processed.
   1743     Args:
   1744       filename: The name of the current file.
   1745       error: The function to call with any errors found.
   1746     """
   1747     # Note: This test can result in false positives if #ifdef constructs
   1748     # get in the way of brace matching. See the testBuildClass test in
   1749     # cpplint_unittest.py for an example of this.
   1750     for obj in self.stack:
   1751       if isinstance(obj, _ClassInfo):
   1752         error(filename, obj.starting_linenum, 'build/class', 5,
   1753               'Failed to find complete declaration of class %s' %
   1754               obj.name)
   1755 
   1756 
   1757 def CheckForNonStandardConstructs(filename, clean_lines, linenum,
   1758                                   nesting_state, error):
   1759   """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
   1760 
   1761   Complain about several constructs which gcc-2 accepts, but which are
   1762   not standard C++.  Warning about these in lint is one way to ease the
   1763   transition to new compilers.
   1764   - put storage class first (e.g. "static const" instead of "const static").
   1765   - "%lld" instead of %qd" in printf-type functions.
   1766   - "%1$d" is non-standard in printf-type functions.
   1767   - "\%" is an undefined character escape sequence.
   1768   - text after #endif is not allowed.
   1769   - invalid inner-style forward declaration.
   1770   - >? and <? operators, and their >?= and <?= cousins.
   1771 
   1772   Additionally, check for constructor/destructor style violations and reference
   1773   members, as it is very convenient to do so while checking for
   1774   gcc-2 compliance.
   1775 
   1776   Args:
   1777     filename: The name of the current file.
   1778     clean_lines: A CleansedLines instance containing the file.
   1779     linenum: The number of the line to check.
   1780     nesting_state: A _NestingState instance which maintains information about
   1781                    the current stack of nested blocks being parsed.
   1782     error: A callable to which errors are reported, which takes 4 arguments:
   1783            filename, line number, error level, and message
   1784   """
   1785 
   1786   # Remove comments from the line, but leave in strings for now.
   1787   line = clean_lines.lines[linenum]
   1788 
   1789   if Search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
   1790     error(filename, linenum, 'runtime/printf_format', 3,
   1791           '%q in format strings is deprecated.  Use %ll instead.')
   1792 
   1793   if Search(r'printf\s*\(.*".*%\d+\$', line):
   1794     error(filename, linenum, 'runtime/printf_format', 2,
   1795           '%N$ formats are unconventional.  Try rewriting to avoid them.')
   1796 
   1797   # Remove escaped backslashes before looking for undefined escapes.
   1798   line = line.replace('\\\\', '')
   1799 
   1800   if Search(r'("|\').*\\(%|\[|\(|{)', line):
   1801     error(filename, linenum, 'build/printf_format', 3,
   1802           '%, [, (, and { are undefined character escapes.  Unescape them.')
   1803 
   1804   # For the rest, work with both comments and strings removed.
   1805   line = clean_lines.elided[linenum]
   1806 
   1807   if Search(r'\b(const|volatile|void|char|short|int|long'
   1808             r'|float|double|signed|unsigned'
   1809             r'|schar|u?int8|u?int16|u?int32|u?int64)'
   1810             r'\s+(register|static|extern|typedef)\b',
   1811             line):
   1812     error(filename, linenum, 'build/storage_class', 5,
   1813           'Storage class (static, extern, typedef, etc) should be first.')
   1814 
   1815   if Match(r'\s*#\s*endif\s*[^/\s]+', line):
   1816     error(filename, linenum, 'build/endif_comment', 5,
   1817           'Uncommented text after #endif is non-standard.  Use a comment.')
   1818 
   1819   if Match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
   1820     error(filename, linenum, 'build/forward_decl', 5,
   1821           'Inner-style forward declarations are invalid.  Remove this line.')
   1822 
   1823   if Search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?',
   1824             line):
   1825     error(filename, linenum, 'build/deprecated', 3,
   1826           '>? and <? (max and min) operators are non-standard and deprecated.')
   1827 
   1828   if Search(r'^\s*const\s*string\s*&\s*\w+\s*;', line):
   1829     # TODO(unknown): Could it be expanded safely to arbitrary references,
   1830     # without triggering too many false positives? The first
   1831     # attempt triggered 5 warnings for mostly benign code in the regtest, hence
   1832     # the restriction.
   1833     # Here's the original regexp, for the reference:
   1834     # type_name = r'\w+((\s*::\s*\w+)|(\s*<\s*\w+?\s*>))?'
   1835     # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;'
   1836     error(filename, linenum, 'runtime/member_string_references', 2,
   1837           'const string& members are dangerous. It is much better to use '
   1838           'alternatives, such as pointers or simple constants.')
   1839 
   1840   # Everything else in this function operates on class declarations.
   1841   # Return early if the top of the nesting stack is not a class, or if
   1842   # the class head is not completed yet.
   1843   classinfo = nesting_state.InnermostClass()
   1844   if not classinfo or not classinfo.seen_open_brace:
   1845     return
   1846 
   1847   # The class may have been declared with namespace or classname qualifiers.
   1848   # The constructor and destructor will not have those qualifiers.
   1849   base_classname = classinfo.name.split('::')[-1]
   1850 
   1851   # Look for single-argument constructors that aren't marked explicit.
   1852   # Technically a valid construct, but against style.
   1853   args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)'
   1854                % re.escape(base_classname),
   1855                line)
   1856   if (args and
   1857       args.group(1) != 'void' and
   1858       not Match(r'(const\s+)?%s\s*(?:<\w+>\s*)?&' % re.escape(base_classname),
   1859                 args.group(1).strip())):
   1860     error(filename, linenum, 'runtime/explicit', 5,
   1861           'Single-argument constructors should be marked explicit.')
   1862 
   1863 
   1864 def CheckSpacingForFunctionCall(filename, line, linenum, error):
   1865   """Checks for the correctness of various spacing around function calls.
   1866 
   1867   Args:
   1868     filename: The name of the current file.
   1869     line: The text of the line to check.
   1870     linenum: The number of the line to check.
   1871     error: The function to call with any errors found.
   1872   """
   1873 
   1874   # Since function calls often occur inside if/for/while/switch
   1875   # expressions - which have their own, more liberal conventions - we
   1876   # first see if we should be looking inside such an expression for a
   1877   # function call, to which we can apply more strict standards.
   1878   fncall = line    # if there's no control flow construct, look at whole line
   1879   for pattern in (r'\bif\s*\((.*)\)\s*{',
   1880                   r'\bfor\s*\((.*)\)\s*{',
   1881                   r'\bwhile\s*\((.*)\)\s*[{;]',
   1882                   r'\bswitch\s*\((.*)\)\s*{'):
   1883     match = Search(pattern, line)
   1884     if match:
   1885       fncall = match.group(1)    # look inside the parens for function calls
   1886       break
   1887 
   1888   # Except in if/for/while/switch, there should never be space
   1889   # immediately inside parens (eg "f( 3, 4 )").  We make an exception
   1890   # for nested parens ( (a+b) + c ).  Likewise, there should never be
   1891   # a space before a ( when it's a function argument.  I assume it's a
   1892   # function argument when the char before the whitespace is legal in
   1893   # a function name (alnum + _) and we're not starting a macro. Also ignore
   1894   # pointers and references to arrays and functions coz they're too tricky:
   1895   # we use a very simple way to recognize these:
   1896   # " (something)(maybe-something)" or
   1897   # " (something)(maybe-something," or
   1898   # " (something)[something]"
   1899   # Note that we assume the contents of [] to be short enough that
   1900   # they'll never need to wrap.
   1901   if (  # Ignore control structures.
   1902       # BEGIN android-changed
   1903       # not Search(r'\b(if|for|while|switch|return|delete)\b', fncall) and
   1904       not Search(r'\b(if|for|while|switch|return|delete|new)\b', fncall) and
   1905       # END android-changed
   1906       # Ignore pointers/references to functions.
   1907       not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and
   1908       # Ignore pointers/references to arrays.
   1909       not Search(r' \([^)]+\)\[[^\]]+\]', fncall)):
   1910     if Search(r'\w\s*\(\s(?!\s*\\$)', fncall):      # a ( used for a fn call
   1911       error(filename, linenum, 'whitespace/parens', 4,
   1912             'Extra space after ( in function call')
   1913     elif Search(r'\(\s+(?!(\s*\\)|\()', fncall):
   1914       error(filename, linenum, 'whitespace/parens', 2,
   1915             'Extra space after (')
   1916     if (Search(r'\w\s+\(', fncall) and
   1917         not Search(r'#\s*define|typedef', fncall) and
   1918         not Search(r'\w\s+\((\w+::)?\*\w+\)\(', fncall)):
   1919       error(filename, linenum, 'whitespace/parens', 4,
   1920             'Extra space before ( in function call')
   1921     # If the ) is followed only by a newline or a { + newline, assume it's
   1922     # part of a control statement (if/while/etc), and don't complain
   1923     if Search(r'[^)]\s+\)\s*[^{\s]', fncall):
   1924       # If the closing parenthesis is preceded by only whitespaces,
   1925       # try to give a more descriptive error message.
   1926       if Search(r'^\s+\)', fncall):
   1927         error(filename, linenum, 'whitespace/parens', 2,
   1928               'Closing ) should be moved to the previous line')
   1929       else:
   1930         error(filename, linenum, 'whitespace/parens', 2,
   1931               'Extra space before )')
   1932 
   1933 
   1934 def IsBlankLine(line):
   1935   """Returns true if the given line is blank.
   1936 
   1937   We consider a line to be blank if the line is empty or consists of
   1938   only white spaces.
   1939 
   1940   Args:
   1941     line: A line of a string.
   1942 
   1943   Returns:
   1944     True, if the given line is blank.
   1945   """
   1946   return not line or line.isspace()
   1947 
   1948 
   1949 def CheckForFunctionLengths(filename, clean_lines, linenum,
   1950                             function_state, error):
   1951   """Reports for long function bodies.
   1952 
   1953   For an overview why this is done, see:
   1954   http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
   1955 
   1956   Uses a simplistic algorithm assuming other style guidelines
   1957   (especially spacing) are followed.
   1958   Only checks unindented functions, so class members are unchecked.
   1959   Trivial bodies are unchecked, so constructors with huge initializer lists
   1960   may be missed.
   1961   Blank/comment lines are not counted so as to avoid encouraging the removal
   1962   of vertical space and comments just to get through a lint check.
   1963   NOLINT *on the last line of a function* disables this check.
   1964 
   1965   Args:
   1966     filename: The name of the current file.
   1967     clean_lines: A CleansedLines instance containing the file.
   1968     linenum: The number of the line to check.
   1969     function_state: Current function name and lines in body so far.
   1970     error: The function to call with any errors found.
   1971   """
   1972   lines = clean_lines.lines
   1973   line = lines[linenum]
   1974   raw = clean_lines.raw_lines
   1975   raw_line = raw[linenum]
   1976   joined_line = ''
   1977 
   1978   starting_func = False
   1979   regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
   1980   match_result = Match(regexp, line)
   1981   if match_result:
   1982     # If the name is all caps and underscores, figure it's a macro and
   1983     # ignore it, unless it's TEST or TEST_F.
   1984     function_name = match_result.group(1).split()[-1]
   1985     if function_name == 'TEST' or function_name == 'TEST_F' or (
   1986         not Match(r'[A-Z_]+$', function_name)):
   1987       starting_func = True
   1988 
   1989   if starting_func:
   1990     body_found = False
   1991     for start_linenum in xrange(linenum, clean_lines.NumLines()):
   1992       start_line = lines[start_linenum]
   1993       joined_line += ' ' + start_line.lstrip()
   1994       if Search(r'(;|})', start_line):  # Declarations and trivial functions
   1995         body_found = True
   1996         break                              # ... ignore
   1997       elif Search(r'{', start_line):
   1998         body_found = True
   1999         function = Search(r'((\w|:)*)\(', line).group(1)
   2000         if Match(r'TEST', function):    # Handle TEST... macros
   2001           parameter_regexp = Search(r'(\(.*\))', joined_line)
   2002           if parameter_regexp:             # Ignore bad syntax
   2003             function += parameter_regexp.group(1)
   2004         else:
   2005           function += '()'
   2006         function_state.Begin(function)
   2007         break
   2008     if not body_found:
   2009       # No body for the function (or evidence of a non-function) was found.
   2010       error(filename, linenum, 'readability/fn_size', 5,
   2011             'Lint failed to find start of function body.')
   2012   elif Match(r'^\}\s*$', line):  # function end
   2013     function_state.Check(error, filename, linenum)
   2014     function_state.End()
   2015   elif not Match(r'^\s*$', line):
   2016     function_state.Count()  # Count non-blank/non-comment lines.
   2017 
   2018 
   2019 _RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?')
   2020 
   2021 
   2022 def CheckComment(comment, filename, linenum, error):
   2023   """Checks for common mistakes in TODO comments.
   2024 
   2025   Args:
   2026     comment: The text of the comment from the line in question.
   2027     filename: The name of the current file.
   2028     linenum: The number of the line to check.
   2029     error: The function to call with any errors found.
   2030   """
   2031   match = _RE_PATTERN_TODO.match(comment)
   2032   if match:
   2033     # One whitespace is correct; zero whitespace is handled elsewhere.
   2034     leading_whitespace = match.group(1)
   2035     if len(leading_whitespace) > 1:
   2036       error(filename, linenum, 'whitespace/todo', 2,
   2037             'Too many spaces before TODO')
   2038 
   2039     username = match.group(2)
   2040     if not username:
   2041       error(filename, linenum, 'readability/todo', 2,
   2042             'Missing username in TODO; it should look like '
   2043             '"// TODO(my_username): Stuff."')
   2044 
   2045     middle_whitespace = match.group(3)
   2046     # Comparisons made explicit for correctness -- pylint: disable-msg=C6403
   2047     if middle_whitespace != ' ' and middle_whitespace != '':
   2048       error(filename, linenum, 'whitespace/todo', 2,
   2049             'TODO(my_username) should be followed by a space')
   2050 
   2051 def CheckAccess(filename, clean_lines, linenum, nesting_state, error):
   2052   """Checks for improper use of DISALLOW* macros.
   2053 
   2054   Args:
   2055     filename: The name of the current file.
   2056     clean_lines: A CleansedLines instance containing the file.
   2057     linenum: The number of the line to check.
   2058     nesting_state: A _NestingState instance which maintains information about
   2059                    the current stack of nested blocks being parsed.
   2060     error: The function to call with any errors found.
   2061   """
   2062   line = clean_lines.elided[linenum]  # get rid of comments and strings
   2063 
   2064   matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|'
   2065                    r'DISALLOW_EVIL_CONSTRUCTORS|'
   2066                    r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line)
   2067   if not matched:
   2068     return
   2069   if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo):
   2070     if nesting_state.stack[-1].access != 'private':
   2071       error(filename, linenum, 'readability/constructors', 3,
   2072             '%s must be in the private: section' % matched.group(1))
   2073 
   2074   else:
   2075     # Found DISALLOW* macro outside a class declaration, or perhaps it
   2076     # was used inside a function when it should have been part of the
   2077     # class declaration.  We could issue a warning here, but it
   2078     # probably resulted in a compiler error already.
   2079     pass
   2080 
   2081 
   2082 def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix):
   2083   """Find the corresponding > to close a template.
   2084 
   2085   Args:
   2086     clean_lines: A CleansedLines instance containing the file.
   2087     linenum: Current line number.
   2088     init_suffix: Remainder of the current line after the initial <.
   2089 
   2090   Returns:
   2091     True if a matching bracket exists.
   2092   """
   2093   line = init_suffix
   2094   nesting_stack = ['<']
   2095   while True:
   2096     # Find the next operator that can tell us whether < is used as an
   2097     # opening bracket or as a less-than operator.  We only want to
   2098     # warn on the latter case.
   2099     #
   2100     # We could also check all other operators and terminate the search
   2101     # early, e.g. if we got something like this "a<b+c", the "<" is
   2102     # most likely a less-than operator, but then we will get false
   2103     # positives for default arguments (e.g. http://go/prccd) and
   2104     # other template expressions (e.g. http://go/oxcjq).
   2105     match = Search(r'^[^<>(),;\[\]]*([<>(),;\[\]])(.*)$', line)
   2106     if match:
   2107       # Found an operator, update nesting stack
   2108       operator = match.group(1)
   2109       line = match.group(2)
   2110 
   2111       if nesting_stack[-1] == '<':
   2112         # Expecting closing angle bracket
   2113         if operator in ('<', '(', '['):
   2114           nesting_stack.append(operator)
   2115         elif operator == '>':
   2116           nesting_stack.pop()
   2117           if not nesting_stack:
   2118             # Found matching angle bracket
   2119             return True
   2120         elif operator == ',':
   2121           # Got a comma after a bracket, this is most likely a template
   2122           # argument.  We have not seen a closing angle bracket yet, but
   2123           # it's probably a few lines later if we look for it, so just
   2124           # return early here.
   2125           return True
   2126         else:
   2127           # Got some other operator.
   2128           return False
   2129 
   2130       else:
   2131         # Expecting closing parenthesis or closing bracket
   2132         if operator in ('<', '(', '['):
   2133           nesting_stack.append(operator)
   2134         elif operator in (')', ']'):
   2135           # We don't bother checking for matching () or [].  If we got
   2136           # something like (] or [), it would have been a syntax error.
   2137           nesting_stack.pop()
   2138 
   2139     else:
   2140       # Scan the next line
   2141       linenum += 1
   2142       if linenum >= len(clean_lines.elided):
   2143         break
   2144       line = clean_lines.elided[linenum]
   2145 
   2146   # Exhausted all remaining lines and still no matching angle bracket.
   2147   # Most likely the input was incomplete, otherwise we should have
   2148   # seen a semicolon and returned early.
   2149   return True
   2150 
   2151 
   2152 def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix):
   2153   """Find the corresponding < that started a template.
   2154 
   2155   Args:
   2156     clean_lines: A CleansedLines instance containing the file.
   2157     linenum: Current line number.
   2158     init_prefix: Part of the current line before the initial >.
   2159 
   2160   Returns:
   2161     True if a matching bracket exists.
   2162   """
   2163   line = init_prefix
   2164   nesting_stack = ['>']
   2165   while True:
   2166     # Find the previous operator
   2167     match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line)
   2168     if match:
   2169       # Found an operator, update nesting stack
   2170       operator = match.group(2)
   2171       line = match.group(1)
   2172 
   2173       if nesting_stack[-1] == '>':
   2174         # Expecting opening angle bracket
   2175         if operator in ('>', ')', ']'):
   2176           nesting_stack.append(operator)
   2177         elif operator == '<':
   2178           nesting_stack.pop()
   2179           if not nesting_stack:
   2180             # Found matching angle bracket
   2181             return True
   2182         elif operator == ',':
   2183           # Got a comma before a bracket, this is most likely a
   2184           # template argument.  The opening angle bracket is probably
   2185           # there if we look for it, so just return early here.
   2186           return True
   2187         else:
   2188           # Got some other operator.
   2189           return False
   2190 
   2191       else:
   2192         # Expecting opening parenthesis or opening bracket
   2193         if operator in ('>', ')', ']'):
   2194           nesting_stack.append(operator)
   2195         elif operator in ('(', '['):
   2196           nesting_stack.pop()
   2197 
   2198     else:
   2199       # Scan the previous line
   2200       linenum -= 1
   2201       if linenum < 0:
   2202         break
   2203       line = clean_lines.elided[linenum]
   2204 
   2205   # Exhausted all earlier lines and still no matching angle bracket.
   2206   return False
   2207 
   2208 
   2209 def CheckSpacing(filename, clean_lines, linenum, nesting_state, error):
   2210   """Checks for the correctness of various spacing issues in the code.
   2211 
   2212   Things we check for: spaces around operators, spaces after
   2213   if/for/while/switch, no spaces around parens in function calls, two
   2214   spaces between code and comment, don't start a block with a blank
   2215   line, don't end a function with a blank line, don't add a blank line
   2216   after public/protected/private, don't have too many blank lines in a row.
   2217 
   2218   Args:
   2219     filename: The name of the current file.
   2220     clean_lines: A CleansedLines instance containing the file.
   2221     linenum: The number of the line to check.
   2222     nesting_state: A _NestingState instance which maintains information about
   2223                    the current stack of nested blocks being parsed.
   2224     error: The function to call with any errors found.
   2225   """
   2226 
   2227   raw = clean_lines.raw_lines
   2228   line = raw[linenum]
   2229 
   2230   # Before nixing comments, check if the line is blank for no good
   2231   # reason.  This includes the first line after a block is opened, and
   2232   # blank lines at the end of a function (ie, right before a line like '}'
   2233   #
   2234   # Skip all the blank line checks if we are immediately inside a
   2235   # namespace body.  In other words, don't issue blank line warnings
   2236   # for this block:
   2237   #   namespace {
   2238   #
   2239   #   }
   2240   #
   2241   # A warning about missing end of namespace comments will be issued instead.
   2242   if IsBlankLine(line) and not nesting_state.InNamespaceBody():
   2243     elided = clean_lines.elided
   2244     prev_line = elided[linenum - 1]
   2245     prevbrace = prev_line.rfind('{')
   2246     # TODO(unknown): Don't complain if line before blank line, and line after,
   2247     #                both start with alnums and are indented the same amount.
   2248     #                This ignores whitespace at the start of a namespace block
   2249     #                because those are not usually indented.
   2250     if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1:
   2251       # OK, we have a blank line at the start of a code block.  Before we
   2252       # complain, we check if it is an exception to the rule: The previous
   2253       # non-empty line has the parameters of a function header that are indented
   2254       # 4 spaces (because they did not fit in a 80 column line when placed on
   2255       # the same line as the function name).  We also check for the case where
   2256       # the previous line is indented 6 spaces, which may happen when the
   2257       # initializers of a constructor do not fit into a 80 column line.
   2258       exception = False
   2259       if Match(r' {6}\w', prev_line):  # Initializer list?
   2260         # We are looking for the opening column of initializer list, which
   2261         # should be indented 4 spaces to cause 6 space indentation afterwards.
   2262         search_position = linenum-2
   2263         while (search_position >= 0
   2264                and Match(r' {6}\w', elided[search_position])):
   2265           search_position -= 1
   2266         exception = (search_position >= 0
   2267                      and elided[search_position][:5] == '    :')
   2268       else:
   2269         # Search for the function arguments or an initializer list.  We use a
   2270         # simple heuristic here: If the line is indented 4 spaces; and we have a
   2271         # closing paren, without the opening paren, followed by an opening brace
   2272         # or colon (for initializer lists) we assume that it is the last line of
   2273         # a function header.  If we have a colon indented 4 spaces, it is an
   2274         # initializer list.
   2275         exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
   2276                            prev_line)
   2277                      or Match(r' {4}:', prev_line))
   2278 
   2279       if not exception:
   2280         error(filename, linenum, 'whitespace/blank_line', 2,
   2281               'Blank line at the start of a code block.  Is this needed?')
   2282     # Ignore blank lines at the end of a block in a long if-else
   2283     # chain, like this:
   2284     #   if (condition1) {
   2285     #     // Something followed by a blank line
   2286     #
   2287     #   } else if (condition2) {
   2288     #     // Something else
   2289     #   }
   2290     if linenum + 1 < clean_lines.NumLines():
   2291       next_line = raw[linenum + 1]
   2292       if (next_line
   2293           and Match(r'\s*}', next_line)
   2294           and next_line.find('} else ') == -1):
   2295         error(filename, linenum, 'whitespace/blank_line', 3,
   2296               'Blank line at the end of a code block.  Is this needed?')
   2297 
   2298     matched = Match(r'\s*(public|protected|private):', prev_line)
   2299     if matched:
   2300       error(filename, linenum, 'whitespace/blank_line', 3,
   2301             'Do not leave a blank line after "%s:"' % matched.group(1))
   2302 
   2303   # Next, we complain if there's a comment too near the text
   2304   commentpos = line.find('//')
   2305   if commentpos != -1:
   2306     # Check if the // may be in quotes.  If so, ignore it
   2307     # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
   2308     if (line.count('"', 0, commentpos) -
   2309         line.count('\\"', 0, commentpos)) % 2 == 0:   # not in quotes
   2310       # Allow one space for new scopes, two spaces otherwise:
   2311       if (not Match(r'^\s*{ //', line) and
   2312           ((commentpos >= 1 and
   2313             line[commentpos-1] not in string.whitespace) or
   2314            (commentpos >= 2 and
   2315             line[commentpos-2] not in string.whitespace))):
   2316         error(filename, linenum, 'whitespace/comments', 2,
   2317               'At least two spaces is best between code and comments')
   2318       # There should always be a space between the // and the comment
   2319       commentend = commentpos + 2
   2320       if commentend < len(line) and not line[commentend] == ' ':
   2321         # but some lines are exceptions -- e.g. if they're big
   2322         # comment delimiters like:
   2323         # //----------------------------------------------------------
   2324         # or are an empty C++ style Doxygen comment, like:
   2325         # ///
   2326         # or they begin with multiple slashes followed by a space:
   2327         # //////// Header comment
   2328         match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or
   2329                  Search(r'^/$', line[commentend:]) or
   2330                  Search(r'^/+ ', line[commentend:]))
   2331         if not match:
   2332           error(filename, linenum, 'whitespace/comments', 4,
   2333                 'Should have a space between // and comment')
   2334       CheckComment(line[commentpos:], filename, linenum, error)
   2335 
   2336   line = clean_lines.elided[linenum]  # get rid of comments and strings
   2337 
   2338   # Don't try to do spacing checks for operator methods
   2339   line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
   2340 
   2341   # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )".
   2342   # Otherwise not.  Note we only check for non-spaces on *both* sides;
   2343   # sometimes people put non-spaces on one side when aligning ='s among
   2344   # many lines (not that this is behavior that I approve of...)
   2345   if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line):
   2346     error(filename, linenum, 'whitespace/operators', 4,
   2347           'Missing spaces around =')
   2348 
   2349   # It's ok not to have spaces around binary operators like + - * /, but if
   2350   # there's too little whitespace, we get concerned.  It's hard to tell,
   2351   # though, so we punt on this one for now.  TODO.
   2352 
   2353   # You should always have whitespace around binary operators.
   2354   #
   2355   # Check <= and >= first to avoid false positives with < and >, then
   2356   # check non-include lines for spacing around < and >.
   2357   match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line)
   2358   if match:
   2359     error(filename, linenum, 'whitespace/operators', 3,
   2360           'Missing spaces around %s' % match.group(1))
   2361   # We allow no-spaces around << when used like this: 10<<20, but
   2362   # not otherwise (particularly, not when used as streams)
   2363   match = Search(r'(\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line)
   2364   if match and not (match.group(1).isdigit() and match.group(2).isdigit()):
   2365     error(filename, linenum, 'whitespace/operators', 3,
   2366           'Missing spaces around <<')
   2367   elif not Match(r'#.*include', line):
   2368     # Avoid false positives on ->
   2369     reduced_line = line.replace('->', '')
   2370 
   2371     # Look for < that is not surrounded by spaces.  This is only
   2372     # triggered if both sides are missing spaces, even though
   2373     # technically should should flag if at least one side is missing a
   2374     # space.  This is done to avoid some false positives with shifts.
   2375     match = Search(r'[^\s<]<([^\s=<].*)', reduced_line)
   2376     if (match and
   2377         not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))):
   2378       error(filename, linenum, 'whitespace/operators', 3,
   2379             'Missing spaces around <')
   2380 
   2381     # Look for > that is not surrounded by spaces.  Similar to the
   2382     # above, we only trigger if both sides are missing spaces to avoid
   2383     # false positives with shifts.
   2384     match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line)
   2385     if (match and
   2386         not FindPreviousMatchingAngleBracket(clean_lines, linenum,
   2387                                              match.group(1))):
   2388       error(filename, linenum, 'whitespace/operators', 3,
   2389             'Missing spaces around >')
   2390 
   2391   # We allow no-spaces around >> for almost anything.  This is because
   2392   # C++11 allows ">>" to close nested templates, which accounts for
   2393   # most cases when ">>" is not followed by a space.
   2394   #
   2395   # We still warn on ">>" followed by alpha character, because that is
   2396   # likely due to ">>" being used for right shifts, e.g.:
   2397   #   value >> alpha
   2398   #
   2399   # When ">>" is used to close templates, the alphanumeric letter that
   2400   # follows would be part of an identifier, and there should still be
   2401   # a space separating the template type and the identifier.
   2402   #   type<type<type>> alpha
   2403   match = Search(r'>>[a-zA-Z_]', line)
   2404   if match:
   2405     error(filename, linenum, 'whitespace/operators', 3,
   2406           'Missing spaces around >>')
   2407 
   2408   # There shouldn't be space around unary operators
   2409   match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
   2410   if match:
   2411     error(filename, linenum, 'whitespace/operators', 4,
   2412           'Extra space for operator %s' % match.group(1))
   2413 
   2414   # A pet peeve of mine: no spaces after an if, while, switch, or for
   2415   match = Search(r' (if\(|for\(|while\(|switch\()', line)
   2416   if match:
   2417     error(filename, linenum, 'whitespace/parens', 5,
   2418           'Missing space before ( in %s' % match.group(1))
   2419 
   2420   # For if/for/while/switch, the left and right parens should be
   2421   # consistent about how many spaces are inside the parens, and
   2422   # there should either be zero or one spaces inside the parens.
   2423   # We don't want: "if ( foo)" or "if ( foo   )".
   2424   # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
   2425   match = Search(r'\b(if|for|while|switch)\s*'
   2426                  r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$',
   2427                  line)
   2428   if match:
   2429     if len(match.group(2)) != len(match.group(4)):
   2430       if not (match.group(3) == ';' and
   2431               len(match.group(2)) == 1 + len(match.group(4)) or
   2432               not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)):
   2433         error(filename, linenum, 'whitespace/parens', 5,
   2434               'Mismatching spaces inside () in %s' % match.group(1))
   2435     if not len(match.group(2)) in [0, 1]:
   2436       error(filename, linenum, 'whitespace/parens', 5,
   2437             'Should have zero or one spaces inside ( and ) in %s' %
   2438             match.group(1))
   2439 
   2440   # You should always have a space after a comma (either as fn arg or operator)
   2441   if Search(r',[^\s]', line):
   2442     error(filename, linenum, 'whitespace/comma', 3,
   2443           'Missing space after ,')
   2444 
   2445   # You should always have a space after a semicolon
   2446   # except for few corner cases
   2447   # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more
   2448   # space after ;
   2449   if Search(r';[^\s};\\)/]', line):
   2450     error(filename, linenum, 'whitespace/semicolon', 3,
   2451           'Missing space after ;')
   2452 
   2453   # Next we will look for issues with function calls.
   2454   CheckSpacingForFunctionCall(filename, line, linenum, error)
   2455 
   2456   # Except after an opening paren, or after another opening brace (in case of
   2457   # an initializer list, for instance), you should have spaces before your
   2458   # braces. And since you should never have braces at the beginning of a line,
   2459   # this is an easy test.
   2460   if Search(r'[^ ({]{', line):
   2461     error(filename, linenum, 'whitespace/braces', 5,
   2462           'Missing space before {')
   2463 
   2464   # Make sure '} else {' has spaces.
   2465   if Search(r'}else', line):
   2466     error(filename, linenum, 'whitespace/braces', 5,
   2467           'Missing space before else')
   2468 
   2469   # You shouldn't have spaces before your brackets, except maybe after
   2470   # 'delete []' or 'new char * []'.
   2471   if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line):
   2472     error(filename, linenum, 'whitespace/braces', 5,
   2473           'Extra space before [')
   2474 
   2475   # You shouldn't have a space before a semicolon at the end of the line.
   2476   # There's a special case for "for" since the style guide allows space before
   2477   # the semicolon there.
   2478   if Search(r':\s*;\s*$', line):
   2479     error(filename, linenum, 'whitespace/semicolon', 5,
   2480           'Semicolon defining empty statement. Use {} instead.')
   2481   elif Search(r'^\s*;\s*$', line):
   2482     error(filename, linenum, 'whitespace/semicolon', 5,
   2483           'Line contains only semicolon. If this should be an empty statement, '
   2484           'use {} instead.')
   2485   elif (Search(r'\s+;\s*$', line) and
   2486         not Search(r'\bfor\b', line)):
   2487     error(filename, linenum, 'whitespace/semicolon', 5,
   2488           'Extra space before last semicolon. If this should be an empty '
   2489           'statement, use {} instead.')
   2490 
   2491   # In range-based for, we wanted spaces before and after the colon, but
   2492   # not around "::" tokens that might appear.
   2493   if (Search('for *\(.*[^:]:[^: ]', line) or
   2494       Search('for *\(.*[^: ]:[^:]', line)):
   2495     error(filename, linenum, 'whitespace/forcolon', 2,
   2496           'Missing space around colon in range-based for loop')
   2497 
   2498 
   2499 def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error):
   2500   """Checks for additional blank line issues related to sections.
   2501 
   2502   Currently the only thing checked here is blank line before protected/private.
   2503 
   2504   Args:
   2505     filename: The name of the current file.
   2506     clean_lines: A CleansedLines instance containing the file.
   2507     class_info: A _ClassInfo objects.
   2508     linenum: The number of the line to check.
   2509     error: The function to call with any errors found.
   2510   """
   2511   # Skip checks if the class is small, where small means 25 lines or less.
   2512   # 25 lines seems like a good cutoff since that's the usual height of
   2513   # terminals, and any class that can't fit in one screen can't really
   2514   # be considered "small".
   2515   #
   2516   # Also skip checks if we are on the first line.  This accounts for
   2517   # classes that look like
   2518   #   class Foo { public: ... };
   2519   #
   2520   # If we didn't find the end of the class, last_line would be zero,
   2521   # and the check will be skipped by the first condition.
   2522   if (class_info.last_line - class_info.starting_linenum <= 24 or
   2523       linenum <= class_info.starting_linenum):
   2524     return
   2525 
   2526   matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum])
   2527   if matched:
   2528     # Issue warning if the line before public/protected/private was
   2529     # not a blank line, but don't do this if the previous line contains
   2530     # "class" or "struct".  This can happen two ways:
   2531     #  - We are at the beginning of the class.
   2532     #  - We are forward-declaring an inner class that is semantically
   2533     #    private, but needed to be public for implementation reasons.
   2534     # Also ignores cases where the previous line ends with a backslash as can be
   2535     # common when defining classes in C macros.
   2536     prev_line = clean_lines.lines[linenum - 1]
   2537     if (not IsBlankLine(prev_line) and
   2538         not Search(r'\b(class|struct)\b', prev_line) and
   2539         not Search(r'\\$', prev_line)):
   2540       # Try a bit harder to find the beginning of the class.  This is to
   2541       # account for multi-line base-specifier lists, e.g.:
   2542       #   class Derived
   2543       #       : public Base {
   2544       end_class_head = class_info.starting_linenum
   2545       for i in range(class_info.starting_linenum, linenum):
   2546         if Search(r'\{\s*$', clean_lines.lines[i]):
   2547           end_class_head = i
   2548           break
   2549       if end_class_head < linenum - 1:
   2550         error(filename, linenum, 'whitespace/blank_line', 3,
   2551               '"%s:" should be preceded by a blank line' % matched.group(1))
   2552 
   2553 
   2554 def GetPreviousNonBlankLine(clean_lines, linenum):
   2555   """Return the most recent non-blank line and its line number.
   2556 
   2557   Args:
   2558     clean_lines: A CleansedLines instance containing the file contents.
   2559     linenum: The number of the line to check.
   2560 
   2561   Returns:
   2562     A tuple with two elements.  The first element is the contents of the last
   2563     non-blank line before the current line, or the empty string if this is the
   2564     first non-blank line.  The second is the line number of that line, or -1
   2565     if this is the first non-blank line.
   2566   """
   2567 
   2568   prevlinenum = linenum - 1
   2569   while prevlinenum >= 0:
   2570     prevline = clean_lines.elided[prevlinenum]
   2571     if not IsBlankLine(prevline):     # if not a blank line...
   2572       return (prevline, prevlinenum)
   2573     prevlinenum -= 1
   2574   return ('', -1)
   2575 
   2576 
   2577 def CheckBraces(filename, clean_lines, linenum, error):
   2578   """Looks for misplaced braces (e.g. at the end of line).
   2579 
   2580   Args:
   2581     filename: The name of the current file.
   2582     clean_lines: A CleansedLines instance containing the file.
   2583     linenum: The number of the line to check.
   2584     error: The function to call with any errors found.
   2585   """
   2586 
   2587   line = clean_lines.elided[linenum]        # get rid of comments and strings
   2588 
   2589   if Match(r'\s*{\s*$', line):
   2590     # We allow an open brace to start a line in the case where someone
   2591     # is using braces in a block to explicitly create a new scope,
   2592     # which is commonly used to control the lifetime of
   2593     # stack-allocated variables.  We don't detect this perfectly: we
   2594     # just don't complain if the last non-whitespace character on the
   2595     # previous non-blank line is ';', ':', '{', or '}', or if the previous
   2596     # line starts a preprocessor block.
   2597     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   2598     if (not Search(r'[;:}{]\s*$', prevline) and
   2599         not Match(r'\s*#', prevline)):
   2600       error(filename, linenum, 'whitespace/braces', 4,
   2601             '{ should almost always be at the end of the previous line')
   2602 
   2603   # An else clause should be on the same line as the preceding closing brace.
   2604   if Match(r'\s*else\s*', line):
   2605     prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0]
   2606     if Match(r'\s*}\s*$', prevline):
   2607       error(filename, linenum, 'whitespace/newline', 4,
   2608             'An else should appear on the same line as the preceding }')
   2609 
   2610   # If braces come on one side of an else, they should be on both.
   2611   # However, we have to worry about "else if" that spans multiple lines!
   2612   if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line):
   2613     if Search(r'}\s*else if([^{]*)$', line):       # could be multi-line if
   2614       # find the ( after the if
   2615       pos = line.find('else if')
   2616       pos = line.find('(', pos)
   2617       if pos > 0:
   2618         (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos)
   2619         if endline[endpos:].find('{') == -1:    # must be brace after if
   2620           error(filename, linenum, 'readability/braces', 5,
   2621                 'If an else has a brace on one side, it should have it on both')
   2622     else:            # common case: else not followed by a multi-line if
   2623       error(filename, linenum, 'readability/braces', 5,
   2624             'If an else has a brace on one side, it should have it on both')
   2625 
   2626   # Likewise, an else should never have the else clause on the same line
   2627   if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line):
   2628     error(filename, linenum, 'whitespace/newline', 4,
   2629           'Else clause should never be on same line as else (use 2 lines)')
   2630 
   2631   # In the same way, a do/while should never be on one line
   2632   if Match(r'\s*do [^\s{]', line):
   2633     error(filename, linenum, 'whitespace/newline', 4,
   2634           'do/while clauses should not be on a single line')
   2635 
   2636   # Braces shouldn't be followed by a ; unless they're defining a struct
   2637   # or initializing an array.
   2638   # We can't tell in general, but we can for some common cases.
   2639   prevlinenum = linenum
   2640   while True:
   2641     (prevline, prevlinenum) = GetPreviousNonBlankLine(clean_lines, prevlinenum)
   2642     if Match(r'\s+{.*}\s*;', line) and not prevline.count(';'):
   2643       line = prevline + line
   2644     else:
   2645       break
   2646   if (Search(r'{.*}\s*;', line) and
   2647       line.count('{') == line.count('}') and
   2648       not Search(r'struct|class|enum|\s*=\s*{', line)):
   2649     error(filename, linenum, 'readability/braces', 4,
   2650           "You don't need a ; after a }")
   2651 
   2652 
   2653 def CheckEmptyLoopBody(filename, clean_lines, linenum, error):
   2654   """Loop for empty loop body with only a single semicolon.
   2655 
   2656   Args:
   2657     filename: The name of the current file.
   2658     clean_lines: A CleansedLines instance containing the file.
   2659     linenum: The number of the line to check.
   2660     error: The function to call with any errors found.
   2661   """
   2662 
   2663   # Search for loop keywords at the beginning of the line.  Because only
   2664   # whitespaces are allowed before the keywords, this will also ignore most
   2665   # do-while-loops, since those lines should start with closing brace.
   2666   line = clean_lines.elided[linenum]
   2667   if Match(r'\s*(for|while)\s*\(', line):
   2668     # Find the end of the conditional expression
   2669     (end_line, end_linenum, end_pos) = CloseExpression(
   2670         clean_lines, linenum, line.find('('))
   2671 
   2672     # Output warning if what follows the condition expression is a semicolon.
   2673     # No warning for all other cases, including whitespace or newline, since we
   2674     # have a separate check for semicolons preceded by whitespace.
   2675     if end_pos >= 0 and Match(r';', end_line[end_pos:]):
   2676       error(filename, end_linenum, 'whitespace/empty_loop_body', 5,
   2677             'Empty loop bodies should use {} or continue')
   2678 
   2679 
   2680 def ReplaceableCheck(operator, macro, line):
   2681   """Determine whether a basic CHECK can be replaced with a more specific one.
   2682 
   2683   For example suggest using CHECK_EQ instead of CHECK(a == b) and
   2684   similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
   2685 
   2686   Args:
   2687     operator: The C++ operator used in the CHECK.
   2688     macro: The CHECK or EXPECT macro being called.
   2689     line: The current source line.
   2690 
   2691   Returns:
   2692     True if the CHECK can be replaced with a more specific one.
   2693   """
   2694 
   2695   # This matches decimal and hex integers, strings, and chars (in that order).
   2696   match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
   2697 
   2698   # Expression to match two sides of the operator with something that
   2699   # looks like a literal, since CHECK(x == iterator) won't compile.
   2700   # This means we can't catch all the cases where a more specific
   2701   # CHECK is possible, but it's less annoying than dealing with
   2702   # extraneous warnings.
   2703   match_this = (r'\s*' + macro + r'\((\s*' +
   2704                 match_constant + r'\s*' + operator + r'[^<>].*|'
   2705                 r'.*[^<>]' + operator + r'\s*' + match_constant +
   2706                 r'\s*\))')
   2707 
   2708   # Don't complain about CHECK(x == NULL) or similar because
   2709   # CHECK_EQ(x, NULL) won't compile (requires a cast).
   2710   # Also, don't complain about more complex boolean expressions
   2711   # involving && or || such as CHECK(a == b || c == d).
   2712   return Match(match_this, line) and not Search(r'NULL|&&|\|\|', line)
   2713 
   2714 
   2715 def CheckCheck(filename, clean_lines, linenum, error):
   2716   """Checks the use of CHECK and EXPECT macros.
   2717 
   2718   Args:
   2719     filename: The name of the current file.
   2720     clean_lines: A CleansedLines instance containing the file.
   2721     linenum: The number of the line to check.
   2722     error: The function to call with any errors found.
   2723   """
   2724 
   2725   # Decide the set of replacement macros that should be suggested
   2726   raw_lines = clean_lines.raw_lines
   2727   current_macro = ''
   2728   for macro in _CHECK_MACROS:
   2729     if raw_lines[linenum].find(macro) >= 0:
   2730       current_macro = macro
   2731       break
   2732   if not current_macro:
   2733     # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
   2734     return
   2735 
   2736   line = clean_lines.elided[linenum]        # get rid of comments and strings
   2737 
   2738   # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
   2739   for operator in ['==', '!=', '>=', '>', '<=', '<']:
   2740     if ReplaceableCheck(operator, current_macro, line):
   2741       error(filename, linenum, 'readability/check', 2,
   2742             'Consider using %s instead of %s(a %s b)' % (
   2743                 _CHECK_REPLACEMENT[current_macro][operator],
   2744                 current_macro, operator))
   2745       break
   2746 
   2747 
   2748 def CheckAltTokens(filename, clean_lines, linenum, error):
   2749   """Check alternative keywords being used in boolean expressions.
   2750 
   2751   Args:
   2752     filename: The name of the current file.
   2753     clean_lines: A CleansedLines instance containing the file.
   2754     linenum: The number of the line to check.
   2755     error: The function to call with any errors found.
   2756   """
   2757   line = clean_lines.elided[linenum]
   2758 
   2759   # Avoid preprocessor lines
   2760   if Match(r'^\s*#', line):
   2761     return
   2762 
   2763   # Last ditch effort to avoid multi-line comments.  This will not help
   2764   # if the comment started before the current line or ended after the
   2765   # current line, but it catches most of the false positives.  At least,
   2766   # it provides a way to workaround this warning for people who use
   2767   # multi-line comments in preprocessor macros.
   2768   #
   2769   # TODO(unknown): remove this once cpplint has better support for
   2770   # multi-line comments.
   2771   if line.find('/*') >= 0 or line.find('*/') >= 0:
   2772     return
   2773 
   2774   for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line):
   2775     error(filename, linenum, 'readability/alt_tokens', 2,
   2776           'Use operator %s instead of %s' % (
   2777               _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1)))
   2778 
   2779 
   2780 def GetLineWidth(line):
   2781   """Determines the width of the line in column positions.
   2782 
   2783   Args:
   2784     line: A string, which may be a Unicode string.
   2785 
   2786   Returns:
   2787     The width of the line in column positions, accounting for Unicode
   2788     combining characters and wide characters.
   2789   """
   2790   if isinstance(line, unicode):
   2791     width = 0
   2792     for uc in unicodedata.normalize('NFC', line):
   2793       if unicodedata.east_asian_width(uc) in ('W', 'F'):
   2794         width += 2
   2795       elif not unicodedata.combining(uc):
   2796         width += 1
   2797     return width
   2798   else:
   2799     return len(line)
   2800 
   2801 
   2802 def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
   2803                error):
   2804   """Checks rules from the 'C++ style rules' section of cppguide.html.
   2805 
   2806   Most of these rules are hard to test (naming, comment style), but we
   2807   do what we can.  In particular we check for 2-space indents, line lengths,
   2808   tab usage, spaces inside code, etc.
   2809 
   2810   Args:
   2811     filename: The name of the current file.
   2812     clean_lines: A CleansedLines instance containing the file.
   2813     linenum: The number of the line to check.
   2814     file_extension: The extension (without the dot) of the filename.
   2815     nesting_state: A _NestingState instance which maintains information about
   2816                    the current stack of nested blocks being parsed.
   2817     error: The function to call with any errors found.
   2818   """
   2819 
   2820   raw_lines = clean_lines.raw_lines
   2821   line = raw_lines[linenum]
   2822 
   2823   if line.find('\t') != -1:
   2824     error(filename, linenum, 'whitespace/tab', 1,
   2825           'Tab found; better to use spaces')
   2826 
   2827   # One or three blank spaces at the beginning of the line is weird; it's
   2828   # hard to reconcile that with 2-space indents.
   2829   # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
   2830   # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
   2831   # if(RLENGTH > 20) complain = 0;
   2832   # if(match($0, " +(error|private|public|protected):")) complain = 0;
   2833   # if(match(prev, "&& *$")) complain = 0;
   2834   # if(match(prev, "\\|\\| *$")) complain = 0;
   2835   # if(match(prev, "[\",=><] *$")) complain = 0;
   2836   # if(match($0, " <<")) complain = 0;
   2837   # if(match(prev, " +for \\(")) complain = 0;
   2838   # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
   2839   initial_spaces = 0
   2840   cleansed_line = clean_lines.elided[linenum]
   2841   while initial_spaces < len(line) and line[initial_spaces] == ' ':
   2842     initial_spaces += 1
   2843   if line and line[-1].isspace():
   2844     error(filename, linenum, 'whitespace/end_of_line', 4,
   2845           'Line ends in whitespace.  Consider deleting these extra spaces.')
   2846   # There are certain situations we allow one space, notably for labels
   2847   elif ((initial_spaces == 1 or initial_spaces == 3) and
   2848         not Match(r'\s*\w+\s*:\s*$', cleansed_line)):
   2849     error(filename, linenum, 'whitespace/indent', 3,
   2850           'Weird number of spaces at line-start.  '
   2851           'Are you using a 2-space indent?')
   2852   # Labels should always be indented at least one space.
   2853   elif not initial_spaces and line[:2] != '//' and Search(r'[^:]:\s*$',
   2854                                                           line):
   2855     error(filename, linenum, 'whitespace/labels', 4,
   2856           'Labels should always be indented at least one space.  '
   2857           'If this is a member-initializer list in a constructor or '
   2858           'the base class list in a class definition, the colon should '
   2859           'be on the following line.')
   2860 
   2861 
   2862   # Check if the line is a header guard.
   2863   is_header_guard = False
   2864   if file_extension == 'h':
   2865     cppvar = GetHeaderGuardCPPVariable(filename)
   2866     if (line.startswith('#ifndef %s' % cppvar) or
   2867         line.startswith('#define %s' % cppvar) or
   2868         line.startswith('#endif  // %s' % cppvar)):
   2869       is_header_guard = True
   2870   # #include lines and header guards can be long, since there's no clean way to
   2871   # split them.
   2872   #
   2873   # URLs can be long too.  It's possible to split these, but it makes them
   2874   # harder to cut&paste.
   2875   #
   2876   # The "$Id:...$" comment may also get very long without it being the
   2877   # developers fault.
   2878   if (not line.startswith('#include') and not is_header_guard and
   2879       not Match(r'^\s*//.*http(s?)://\S*$', line) and
   2880       not Match(r'^// \$Id:.*#[0-9]+ \$$', line)):
   2881     line_width = GetLineWidth(line)
   2882     if line_width > 100:
   2883       error(filename, linenum, 'whitespace/line_length', 4,
   2884             'Lines should very rarely be longer than 100 characters')
   2885     elif line_width > 80:
   2886       error(filename, linenum, 'whitespace/line_length', 2,
   2887             'Lines should be <= 80 characters long')
   2888 
   2889   if (cleansed_line.count(';') > 1 and
   2890       # for loops are allowed two ;'s (and may run over two lines).
   2891       cleansed_line.find('for') == -1 and
   2892       (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or
   2893        GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and
   2894       # It's ok to have many commands in a switch case that fits in 1 line
   2895       not ((cleansed_line.find('case ') != -1 or
   2896             cleansed_line.find('default:') != -1) and
   2897            cleansed_line.find('break;') != -1)):
   2898     error(filename, linenum, 'whitespace/newline', 0,
   2899           'More than one command on the same line')
   2900 
   2901   # Some more style checks
   2902   CheckBraces(filename, clean_lines, linenum, error)
   2903   CheckEmptyLoopBody(filename, clean_lines, linenum, error)
   2904   CheckAccess(filename, clean_lines, linenum, nesting_state, error)
   2905   CheckSpacing(filename, clean_lines, linenum, nesting_state, error)
   2906   CheckCheck(filename, clean_lines, linenum, error)
   2907   CheckAltTokens(filename, clean_lines, linenum, error)
   2908   classinfo = nesting_state.InnermostClass()
   2909   if classinfo:
   2910     CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error)
   2911 
   2912 
   2913 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
   2914 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
   2915 # Matches the first component of a filename delimited by -s and _s. That is:
   2916 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
   2917 #  _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo'
   2918 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo'
   2919 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo'
   2920 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
   2921 
   2922 
   2923 def _DropCommonSuffixes(filename):
   2924   """Drops common suffixes like _test.cc or -inl.h from filename.
   2925 
   2926   For example:
   2927     >>> _DropCommonSuffixes('foo/foo-inl.h')
   2928     'foo/foo'
   2929     >>> _DropCommonSuffixes('foo/bar/foo.cc')
   2930     'foo/bar/foo'
   2931     >>> _DropCommonSuffixes('foo/foo_internal.h')
   2932     'foo/foo'
   2933     >>> _DropCommonSuffixes('foo/foo_unusualinternal.h')
   2934     'foo/foo_unusualinternal'
   2935 
   2936   Args:
   2937     filename: The input filename.
   2938 
   2939   Returns:
   2940     The filename with the common suffix removed.
   2941   """
   2942   for suffix in ('test.cc', 'regtest.cc', 'unittest.cc',
   2943                  'inl.h', 'impl.h', 'internal.h'):
   2944     if (filename.endswith(suffix) and len(filename) > len(suffix) and
   2945         filename[-len(suffix) - 1] in ('-', '_')):
   2946       return filename[:-len(suffix) - 1]
   2947   return os.path.splitext(filename)[0]
   2948 
   2949 
   2950 def _IsTestFilename(filename):
   2951   """Determines if the given filename has a suffix that identifies it as a test.
   2952 
   2953   Args:
   2954     filename: The input filename.
   2955 
   2956   Returns:
   2957     True if 'filename' looks like a test, False otherwise.
   2958   """
   2959   if (filename.endswith('_test.cc') or
   2960       filename.endswith('_unittest.cc') or
   2961       filename.endswith('_regtest.cc')):
   2962     return True
   2963   else:
   2964     return False
   2965 
   2966 
   2967 def _ClassifyInclude(fileinfo, include, is_system):
   2968   """Figures out what kind of header 'include' is.
   2969 
   2970   Args:
   2971     fileinfo: The current file cpplint is running over. A FileInfo instance.
   2972     include: The path to a #included file.
   2973     is_system: True if the #include used <> rather than "".
   2974 
   2975   Returns:
   2976     One of the _XXX_HEADER constants.
   2977 
   2978   For example:
   2979     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True)
   2980     _C_SYS_HEADER
   2981     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True)
   2982     _CPP_SYS_HEADER
   2983     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False)
   2984     _LIKELY_MY_HEADER
   2985     >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'),
   2986     ...                  'bar/foo_other_ext.h', False)
   2987     _POSSIBLE_MY_HEADER
   2988     >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False)
   2989     _OTHER_HEADER
   2990   """
   2991   # This is a list of all standard c++ header files, except
   2992   # those already checked for above.
   2993   is_stl_h = include in _STL_HEADERS
   2994   is_cpp_h = is_stl_h or include in _CPP_HEADERS
   2995 
   2996   if is_system:
   2997     if is_cpp_h:
   2998       return _CPP_SYS_HEADER
   2999     else:
   3000       return _C_SYS_HEADER
   3001 
   3002   # If the target file and the include we're checking share a
   3003   # basename when we drop common extensions, and the include
   3004   # lives in . , then it's likely to be owned by the target file.
   3005   target_dir, target_base = (
   3006       os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName())))
   3007   include_dir, include_base = os.path.split(_DropCommonSuffixes(include))
   3008   if target_base == include_base and (
   3009       include_dir == target_dir or
   3010       include_dir == os.path.normpath(target_dir + '/../public')):
   3011     return _LIKELY_MY_HEADER
   3012 
   3013   # If the target and include share some initial basename
   3014   # component, it's possible the target is implementing the
   3015   # include, so it's allowed to be first, but we'll never
   3016   # complain if it's not there.
   3017   target_first_component = _RE_FIRST_COMPONENT.match(target_base)
   3018   include_first_component = _RE_FIRST_COMPONENT.match(include_base)
   3019   if (target_first_component and include_first_component and
   3020       target_first_component.group(0) ==
   3021       include_first_component.group(0)):
   3022     return _POSSIBLE_MY_HEADER
   3023 
   3024   return _OTHER_HEADER
   3025 
   3026 
   3027 
   3028 def CheckIncludeLine(filename, clean_lines, linenum, include_state, error):
   3029   """Check rules that are applicable to #include lines.
   3030 
   3031   Strings on #include lines are NOT removed from elided line, to make
   3032   certain tasks easier. However, to prevent false positives, checks
   3033   applicable to #include lines in CheckLanguage must be put here.
   3034 
   3035   Args:
   3036     filename: The name of the current file.
   3037     clean_lines: A CleansedLines instance containing the file.
   3038     linenum: The number of the line to check.
   3039     include_state: An _IncludeState instance in which the headers are inserted.
   3040     error: The function to call with any errors found.
   3041   """
   3042   fileinfo = FileInfo(filename)
   3043 
   3044   line = clean_lines.lines[linenum]
   3045 
   3046   # "include" should use the new style "foo/bar.h" instead of just "bar.h"
   3047   if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line):
   3048     error(filename, linenum, 'build/include', 4,
   3049           'Include the directory when naming .h files')
   3050 
   3051   # we shouldn't include a file more than once. actually, there are a
   3052   # handful of instances where doing so is okay, but in general it's
   3053   # not.
   3054   match = _RE_PATTERN_INCLUDE.search(line)
   3055   if match:
   3056     include = match.group(2)
   3057     is_system = (match.group(1) == '<')
   3058     if include in include_state:
   3059       error(filename, linenum, 'build/include', 4,
   3060             '"%s" already included at %s:%s' %
   3061             (include, filename, include_state[include]))
   3062     else:
   3063       include_state[include] = linenum
   3064 
   3065       # We want to ensure that headers appear in the right order:
   3066       # 1) for foo.cc, foo.h  (preferred location)
   3067       # 2) c system files
   3068       # 3) cpp system files
   3069       # 4) for foo.cc, foo.h  (deprecated location)
   3070       # 5) other google headers
   3071       #
   3072       # We classify each include statement as one of those 5 types
   3073       # using a number of techniques. The include_state object keeps
   3074       # track of the highest type seen, and complains if we see a
   3075       # lower type after that.
   3076       error_message = include_state.CheckNextIncludeOrder(
   3077           _ClassifyInclude(fileinfo, include, is_system))
   3078       if error_message:
   3079         error(filename, linenum, 'build/include_order', 4,
   3080               '%s. Should be: %s.h, c system, c++ system, other.' %
   3081               (error_message, fileinfo.BaseName()))
   3082       if not include_state.IsInAlphabeticalOrder(include):
   3083         error(filename, linenum, 'build/include_alpha', 4,
   3084               'Include "%s" not in alphabetical order' % include)
   3085 
   3086   # Look for any of the stream classes that are part of standard C++.
   3087   match = _RE_PATTERN_INCLUDE.match(line)
   3088   if match:
   3089     include = match.group(2)
   3090     if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
   3091       # Many unit tests use cout, so we exempt them.
   3092       if not _IsTestFilename(filename):
   3093         error(filename, linenum, 'readability/streams', 3,
   3094               'Streams are highly discouraged.')
   3095 
   3096 
   3097 def _GetTextInside(text, start_pattern):
   3098   """Retrieves all the text between matching open and close parentheses.
   3099 
   3100   Given a string of lines and a regular expression string, retrieve all the text
   3101   following the expression and between opening punctuation symbols like
   3102   (, [, or {, and the matching close-punctuation symbol. This properly nested
   3103   occurrences of the punctuations, so for the text like
   3104     printf(a(), b(c()));
   3105   a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'.
   3106   start_pattern must match string having an open punctuation symbol at the end.
   3107 
   3108   Args:
   3109     text: The lines to extract text. Its comments and strings must be elided.
   3110            It can be single line and can span multiple lines.
   3111     start_pattern: The regexp string indicating where to start extracting
   3112                    the text.
   3113   Returns:
   3114     The extracted text.
   3115     None if either the opening string or ending punctuation could not be found.
   3116   """
   3117   # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably
   3118   # rewritten to use _GetTextInside (and use inferior regexp matching today).
   3119 
   3120   # Give opening punctuations to get the matching close-punctuations.
   3121   matching_punctuation = {'(': ')', '{': '}', '[': ']'}
   3122   closing_punctuation = set(matching_punctuation.itervalues())
   3123 
   3124   # Find the position to start extracting text.
   3125   match = re.search(start_pattern, text, re.M)
   3126   if not match:  # start_pattern not found in text.
   3127     return None
   3128   start_position = match.end(0)
   3129 
   3130   assert start_position > 0, (
   3131       'start_pattern must ends with an opening punctuation.')
   3132   assert text[start_position - 1] in matching_punctuation, (
   3133       'start_pattern must ends with an opening punctuation.')
   3134   # Stack of closing punctuations we expect to have in text after position.
   3135   punctuation_stack = [matching_punctuation[text[start_position - 1]]]
   3136   position = start_position
   3137   while punctuation_stack and position < len(text):
   3138     if text[position] == punctuation_stack[-1]:
   3139       punctuation_stack.pop()
   3140     elif text[position] in closing_punctuation:
   3141       # A closing punctuation without matching opening punctuations.
   3142       return None
   3143     elif text[position] in matching_punctuation:
   3144       punctuation_stack.append(matching_punctuation[text[position]])
   3145     position += 1
   3146   if punctuation_stack:
   3147     # Opening punctuations left without matching close-punctuations.
   3148     return None
   3149   # punctuations match.
   3150   return text[start_position:position - 1]
   3151 
   3152 
   3153 def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state,
   3154                   error):
   3155   """Checks rules from the 'C++ language rules' section of cppguide.html.
   3156 
   3157   Some of these rules are hard to test (function overloading, using
   3158   uint32 inappropriately), but we do the best we can.
   3159 
   3160   Args:
   3161     filename: The name of the current file.
   3162     clean_lines: A CleansedLines instance containing the file.
   3163     linenum: The number of the line to check.
   3164     file_extension: The extension (without the dot) of the filename.
   3165     include_state: An _IncludeState instance in which the headers are inserted.
   3166     error: The function to call with any errors found.
   3167   """
   3168   # If the line is empty or consists of entirely a comment, no need to
   3169   # check it.
   3170   line = clean_lines.elided[linenum]
   3171   if not line:
   3172     return
   3173 
   3174   match = _RE_PATTERN_INCLUDE.search(line)
   3175   if match:
   3176     CheckIncludeLine(filename, clean_lines, linenum, include_state, error)
   3177     return
   3178 
   3179   # Create an extended_line, which is the concatenation of the current and
   3180   # next lines, for more effective checking of code that may span more than one
   3181   # line.
   3182   if linenum + 1 < clean_lines.NumLines():
   3183     extended_line = line + clean_lines.elided[linenum + 1]
   3184   else:
   3185     extended_line = line
   3186 
   3187   # Make Windows paths like Unix.
   3188   fullname = os.path.abspath(filename).replace('\\', '/')
   3189 
   3190   # TODO(unknown): figure out if they're using default arguments in fn proto.
   3191 
   3192   # Check for non-const references in functions.  This is tricky because &
   3193   # is also used to take the address of something.  We allow <> for templates,
   3194   # (ignoring whatever is between the braces) and : for classes.
   3195   # These are complicated re's.  They try to capture the following:
   3196   # paren (for fn-prototype start), typename, &, varname.  For the const
   3197   # version, we're willing for const to be before typename or after
   3198   # Don't check the implementation on same line.
   3199   fnline = line.split('{', 1)[0]
   3200   if (len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) >
   3201       len(re.findall(r'\([^()]*\bconst\s+(?:typename\s+)?(?:struct\s+)?'
   3202                      r'(?:[\w:]|<[^()]*>)+(\s?&|&\s?)\w+', fnline)) +
   3203       len(re.findall(r'\([^()]*\b(?:[\w:]|<[^()]*>)+\s+const(\s?&|&\s?)[\w]+',
   3204                      fnline))):
   3205 
   3206     # We allow non-const references in a few standard places, like functions
   3207     # called "swap()" or iostream operators like "<<" or ">>". We also filter
   3208     # out for loops, which lint otherwise mistakenly thinks are functions.
   3209     if not Search(
   3210         r'(for|swap|Swap|operator[<>][<>])\s*\(\s*'
   3211         r'(?:(?:typename\s*)?[\w:]|<.*>)+\s*&',
   3212         fnline):
   3213       error(filename, linenum, 'runtime/references', 2,
   3214             'Is this a non-const reference? '
   3215             'If so, make const or use a pointer.')
   3216 
   3217   # Check to see if they're using an conversion function cast.
   3218   # I just try to capture the most common basic types, though there are more.
   3219   # Parameterless conversion functions, such as bool(), are allowed as they are
   3220   # probably a member operator declaration or default constructor.
   3221   match = Search(
   3222       r'(\bnew\s+)?\b'  # Grab 'new' operator, if it's there
   3223       r'(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
   3224   if match:
   3225     # gMock methods are defined using some variant of MOCK_METHODx(name, type)
   3226     # where type may be float(), int(string), etc.  Without context they are
   3227     # virtually indistinguishable from int(x) casts. Likewise, gMock's
   3228     # MockCallback takes a template parameter of the form return_type(arg_type),
   3229     # which looks much like the cast we're trying to detect.
   3230     # BEGIN android-added
   3231     # The C++ 2011 std::function class template exhibits a similar issue.
   3232     # END android-added
   3233     if (match.group(1) is None and  # If new operator, then this isn't a cast
   3234         not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or
   3235              # BEGIN android-changed
   3236              # Match(r'^\s*MockCallback<.*>', line))):
   3237              Match(r'^\s*MockCallback<.*>', line) or
   3238              Match(r'^\s*std::function<.*>', line))):
   3239              # END android-changed
   3240       # Try a bit harder to catch gmock lines: the only place where
   3241       # something looks like an old-style cast is where we declare the
   3242       # return type of the mocked method, and the only time when we
   3243       # are missing context is if MOCK_METHOD was split across
   3244       # multiple lines (for example http://go/hrfhr ), so we only need
   3245       # to check the previous line for MOCK_METHOD.
   3246       if (linenum == 0 or
   3247           not Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(\S+,\s*$',
   3248                     clean_lines.elided[linenum - 1])):
   3249         error(filename, linenum, 'readability/casting', 4,
   3250               'Using deprecated casting style.  '
   3251               'Use static_cast<%s>(...) instead' %
   3252               match.group(2))
   3253 
   3254   CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
   3255                   'static_cast',
   3256                   r'\((int|float|double|bool|char|u?int(16|32|64))\)', error)
   3257 
   3258   # This doesn't catch all cases. Consider (const char * const)"hello".
   3259   #
   3260   # (char *) "foo" should always be a const_cast (reinterpret_cast won't
   3261   # compile).
   3262   if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
   3263                      'const_cast', r'\((char\s?\*+\s?)\)\s*"', error):
   3264     pass
   3265   else:
   3266     # Check pointer casts for other than string constants
   3267     CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum],
   3268                     'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
   3269 
   3270   # In addition, we look for people taking the address of a cast.  This
   3271   # is dangerous -- casts can assign to temporaries, so the pointer doesn't
   3272   # point where you think.
   3273   if Search(
   3274       r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
   3275     error(filename, linenum, 'runtime/casting', 4,
   3276           ('Are you taking an address of a cast?  '
   3277            'This is dangerous: could be a temp var.  '
   3278            'Take the address before doing the cast, rather than after'))
   3279 
   3280   # Check for people declaring static/global STL strings at the top level.
   3281   # This is dangerous because the C++ language does not guarantee that
   3282   # globals with constructors are initialized before the first access.
   3283   match = Match(
   3284       r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
   3285       line)
   3286   # Make sure it's not a function.
   3287   # Function template specialization looks like: "string foo<Type>(...".
   3288   # Class template definitions look like: "string Foo<Type>::Method(...".
   3289   if match and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
   3290                          match.group(3)):
   3291     error(filename, linenum, 'runtime/string', 4,
   3292           'For a static/global string constant, use a C style string instead: '
   3293           '"%schar %s[]".' %
   3294           (match.group(1), match.group(2)))
   3295 
   3296   # Check that we're not using RTTI outside of testing code.
   3297   if Search(r'\bdynamic_cast<', line) and not _IsTestFilename(filename):
   3298     error(filename, linenum, 'runtime/rtti', 5,
   3299           'Do not use dynamic_cast<>.  If you need to cast within a class '
   3300           "hierarchy, use static_cast<> to upcast.  Google doesn't support "
   3301           'RTTI.')
   3302 
   3303   if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
   3304     error(filename, linenum, 'runtime/init', 4,
   3305           'You seem to be initializing a member variable with itself.')
   3306 
   3307   if file_extension == 'h':
   3308     # TODO(unknown): check that 1-arg constructors are explicit.
   3309     #                How to tell it's a constructor?
   3310     #                (handled in CheckForNonStandardConstructs for now)
   3311     # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS
   3312     #                (level 1 error)
   3313     pass
   3314 
   3315   # Check if people are using the verboten C basic types.  The only exception
   3316   # we regularly allow is "unsigned short port" for port.
   3317   if Search(r'\bshort port\b', line):
   3318     if not Search(r'\bunsigned short port\b', line):
   3319       error(filename, linenum, 'runtime/int', 4,
   3320             'Use "unsigned short" for ports, not "short"')
   3321   else:
   3322     match = Search(r'\b(short|long(?! +double)|long long)\b', line)
   3323     if match:
   3324       error(filename, linenum, 'runtime/int', 4,
   3325             'Use int16/int64/etc, rather than the C type %s' % match.group(1))
   3326 
   3327   # When snprintf is used, the second argument shouldn't be a literal.
   3328   match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
   3329   if match and match.group(2) != '0':
   3330     # If 2nd arg is zero, snprintf is used to calculate size.
   3331     error(filename, linenum, 'runtime/printf', 3,
   3332           'If you can, use sizeof(%s) instead of %s as the 2nd arg '
   3333           'to snprintf.' % (match.group(1), match.group(2)))
   3334 
   3335   # Check if some verboten C functions are being used.
   3336   if Search(r'\bsprintf\b', line):
   3337     error(filename, linenum, 'runtime/printf', 5,
   3338           'Never use sprintf.  Use snprintf instead.')
   3339   match = Search(r'\b(strcpy|strcat)\b', line)
   3340   if match:
   3341     error(filename, linenum, 'runtime/printf', 4,
   3342           'Almost always, snprintf is better than %s' % match.group(1))
   3343 
   3344   if Search(r'\bsscanf\b', line):
   3345     error(filename, linenum, 'runtime/printf', 1,
   3346           'sscanf can be ok, but is slow and can overflow buffers.')
   3347 
   3348   # Check if some verboten operator overloading is going on
   3349   # TODO(unknown): catch out-of-line unary operator&:
   3350   #   class X {};
   3351   #   int operator&(const X& x) { return 42; }  // unary operator&
   3352   # The trick is it's hard to tell apart from binary operator&:
   3353   #   class Y { int operator&(const Y& x) { return 23; } }; // binary operator&
   3354   if Search(r'\boperator\s*&\s*\(\s*\)', line):
   3355     error(filename, linenum, 'runtime/operator', 4,
   3356           'Unary operator& is dangerous.  Do not use it.')
   3357 
   3358   # Check for suspicious usage of "if" like
   3359   # } if (a == b) {
   3360   if Search(r'\}\s*if\s*\(', line):
   3361     error(filename, linenum, 'readability/braces', 4,
   3362           'Did you mean "else if"? If not, start a new line for "if".')
   3363 
   3364   # Check for potential format string bugs like printf(foo).
   3365   # We constrain the pattern not to pick things like DocidForPrintf(foo).
   3366   # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
   3367   # TODO(sugawarayu): Catch the following case. Need to change the calling
   3368   # convention of the whole function to process multiple line to handle it.
   3369   #   printf(
   3370   #       boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line);
   3371   printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(')
   3372   if printf_args:
   3373     match = Match(r'([\w.\->()]+)$', printf_args)
   3374     if match and match.group(1) != '__VA_ARGS__':
   3375       function_name = re.search(r'\b((?:string)?printf)\s*\(',
   3376                                 line, re.I).group(1)
   3377       error(filename, linenum, 'runtime/printf', 4,
   3378             'Potential format string bug. Do %s("%%s", %s) instead.'
   3379             % (function_name, match.group(1)))
   3380 
   3381   # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
   3382   match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
   3383   if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)):
   3384     error(filename, linenum, 'runtime/memset', 4,
   3385           'Did you mean "memset(%s, 0, %s)"?'
   3386           % (match.group(1), match.group(2)))
   3387 
   3388   if Search(r'\busing namespace\b', line):
   3389     error(filename, linenum, 'build/namespaces', 5,
   3390           'Do not use namespace using-directives.  '
   3391           'Use using-declarations instead.')
   3392 
   3393   # Detect variable-length arrays.
   3394   match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
   3395   if (match and match.group(2) != 'return' and match.group(2) != 'delete' and
   3396       match.group(3).find(']') == -1):
   3397     # Split the size using space and arithmetic operators as delimiters.
   3398     # If any of the resulting tokens are not compile time constants then
   3399     # report the error.
   3400     tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3))
   3401     is_const = True
   3402     skip_next = False
   3403     for tok in tokens:
   3404       if skip_next:
   3405         skip_next = False
   3406         continue
   3407 
   3408       if Search(r'sizeof\(.+\)', tok): continue
   3409       if Search(r'arraysize\(\w+\)', tok): continue
   3410 
   3411       tok = tok.lstrip('(')
   3412       tok = tok.rstrip(')')
   3413       if not tok: continue
   3414       if Match(r'\d+', tok): continue
   3415       if Match(r'0[xX][0-9a-fA-F]+', tok): continue
   3416       if Match(r'k[A-Z0-9]\w*', tok): continue
   3417       if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue
   3418       if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue
   3419       # A catch all for tricky sizeof cases, including 'sizeof expression',
   3420       # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
   3421       # requires skipping the next token because we split on ' ' and '*'.
   3422       if tok.startswith('sizeof'):
   3423         skip_next = True
   3424         continue
   3425       is_const = False
   3426       break
   3427     if not is_const:
   3428       error(filename, linenum, 'runtime/arrays', 1,
   3429             'Do not use variable-length arrays.  Use an appropriately named '
   3430             "('k' followed by CamelCase) compile-time constant for the size.")
   3431 
   3432   # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or
   3433   # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing
   3434   # in the class declaration.
   3435   match = Match(
   3436       (r'\s*'
   3437        r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))'
   3438        r'\(.*\);$'),
   3439       line)
   3440   if match and linenum + 1 < clean_lines.NumLines():
   3441     next_line = clean_lines.elided[linenum + 1]
   3442     # We allow some, but not all, declarations of variables to be present
   3443     # in the statement that defines the class.  The [\w\*,\s]* fragment of
   3444     # the regular expression below allows users to declare instances of
   3445     # the class or pointers to instances, but not less common types such
   3446     # as function pointers or arrays.  It's a tradeoff between allowing
   3447     # reasonable code and avoiding trying to parse more C++ using regexps.
   3448     if not Search(r'^\s*}[\w\*,\s]*;', next_line):
   3449       error(filename, linenum, 'readability/constructors', 3,
   3450             match.group(1) + ' should be the last thing in the class')
   3451 
   3452   # Check for use of unnamed namespaces in header files.  Registration
   3453   # macros are typically OK, so we allow use of "namespace {" on lines
   3454   # that end with backslashes.
   3455   if (file_extension == 'h'
   3456       and Search(r'\bnamespace\s*{', line)
   3457       and line[-1] != '\\'):
   3458     error(filename, linenum, 'build/namespaces', 4,
   3459           'Do not use unnamed namespaces in header files.  See '
   3460           'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
   3461           ' for more information.')
   3462 
   3463 
   3464 def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern,
   3465                     error):
   3466   """Checks for a C-style cast by looking for the pattern.
   3467 
   3468   This also handles sizeof(type) warnings, due to similarity of content.
   3469 
   3470   Args:
   3471     filename: The name of the current file.
   3472     linenum: The number of the line to check.
   3473     line: The line of code to check.
   3474     raw_line: The raw line of code to check, with comments.
   3475     cast_type: The string for the C++ cast to recommend.  This is either
   3476       reinterpret_cast, static_cast, or const_cast, depending.
   3477     pattern: The regular expression used to find C-style casts.
   3478     error: The function to call with any errors found.
   3479 
   3480   Returns:
   3481     True if an error was emitted.
   3482     False otherwise.
   3483   """
   3484   match = Search(pattern, line)
   3485   if not match:
   3486     return False
   3487 
   3488   # e.g., sizeof(int)
   3489   sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1])
   3490   if sizeof_match:
   3491     error(filename, linenum, 'runtime/sizeof', 1,
   3492           'Using sizeof(type).  Use sizeof(varname) instead if possible')
   3493     return True
   3494 
   3495   # operator++(int) and operator--(int)
   3496   if (line[0:match.start(1) - 1].endswith(' operator++') or
   3497       line[0:match.start(1) - 1].endswith(' operator--')):
   3498     return False
   3499 
   3500   remainder = line[match.end(0):]
   3501 
   3502   # The close paren is for function pointers as arguments to a function.
   3503   # eg, void foo(void (*bar)(int));
   3504   # The semicolon check is a more basic function check; also possibly a
   3505   # function pointer typedef.
   3506   # eg, void foo(int); or void foo(int) const;
   3507   # The equals check is for function pointer assignment.
   3508   # eg, void *(*foo)(int) = ...
   3509   # The > is for MockCallback<...> ...
   3510   #
   3511   # Right now, this will only catch cases where there's a single argument, and
   3512   # it's unnamed.  It should probably be expanded to check for multiple
   3513   # arguments with some unnamed.
   3514   function_match = Match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)|>))', remainder)
   3515   if function_match:
   3516     if (not function_match.group(3) or
   3517         function_match.group(3) == ';' or
   3518         ('MockCallback<' not in raw_line and
   3519          '/*' not in raw_line)):
   3520       error(filename, linenum, 'readability/function', 3,
   3521             'All parameters should be named in a function')
   3522     return True
   3523 
   3524   # At this point, all that should be left is actual casts.
   3525   error(filename, linenum, 'readability/casting', 4,
   3526         'Using C-style cast.  Use %s<%s>(...) instead' %
   3527         (cast_type, match.group(1)))
   3528 
   3529   return True
   3530 
   3531 
   3532 _HEADERS_CONTAINING_TEMPLATES = (
   3533     ('<deque>', ('deque',)),
   3534     ('<functional>', ('unary_function', 'binary_function',
   3535                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
   3536                       'negate',
   3537                       'equal_to', 'not_equal_to', 'greater', 'less',
   3538                       'greater_equal', 'less_equal',
   3539                       'logical_and', 'logical_or', 'logical_not',
   3540                       'unary_negate', 'not1', 'binary_negate', 'not2',
   3541                       'bind1st', 'bind2nd',
   3542                       'pointer_to_unary_function',
   3543                       'pointer_to_binary_function',
   3544                       'ptr_fun',
   3545                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
   3546                       'mem_fun_ref_t',
   3547                       'const_mem_fun_t', 'const_mem_fun1_t',
   3548                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
   3549                       'mem_fun_ref',
   3550                      )),
   3551     ('<limits>', ('numeric_limits',)),
   3552     ('<list>', ('list',)),
   3553     ('<map>', ('map', 'multimap',)),
   3554     ('<memory>', ('allocator',)),
   3555     ('<queue>', ('queue', 'priority_queue',)),
   3556     ('<set>', ('set', 'multiset',)),
   3557     ('<stack>', ('stack',)),
   3558     ('<string>', ('char_traits', 'basic_string',)),
   3559     ('<utility>', ('pair',)),
   3560     ('<vector>', ('vector',)),
   3561 
   3562     # gcc extensions.
   3563     # Note: std::hash is their hash, ::hash is our hash
   3564     ('<hash_map>', ('hash_map', 'hash_multimap',)),
   3565     ('<hash_set>', ('hash_set', 'hash_multiset',)),
   3566     ('<slist>', ('slist',)),
   3567     )
   3568 
   3569 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
   3570 
   3571 _re_pattern_algorithm_header = []
   3572 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
   3573                   'transform'):
   3574   # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
   3575   # type::max().
   3576   _re_pattern_algorithm_header.append(
   3577       (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
   3578        _template,
   3579        '<algorithm>'))
   3580 
   3581 _re_pattern_templates = []
   3582 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
   3583   for _template in _templates:
   3584     _re_pattern_templates.append(
   3585         (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
   3586          _template + '<>',
   3587          _header))
   3588 
   3589 
   3590 def FilesBelongToSameModule(filename_cc, filename_h):
   3591   """Check if these two filenames belong to the same module.
   3592 
   3593   The concept of a 'module' here is a as follows:
   3594   foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the
   3595   same 'module' if they are in the same directory.
   3596   some/path/public/xyzzy and some/path/internal/xyzzy are also considered
   3597   to belong to the same module here.
   3598 
   3599   If the filename_cc contains a longer path than the filename_h, for example,
   3600   '/absolute/path/to/base/sysinfo.cc', and this file would include
   3601   'base/sysinfo.h', this function also produces the prefix needed to open the
   3602   header. This is used by the caller of this function to more robustly open the
   3603   header file. We don't have access to the real include paths in this context,
   3604   so we need this guesswork here.
   3605 
   3606   Known bugs: tools/base/bar.cc and base/bar.h belong to the same module
   3607   according to this implementation. Because of this, this function gives
   3608   some false positives. This should be sufficiently rare in practice.
   3609 
   3610   Args:
   3611     filename_cc: is the path for the .cc file
   3612     filename_h: is the path for the header path
   3613 
   3614   Returns:
   3615     Tuple with a bool and a string:
   3616     bool: True if filename_cc and filename_h belong to the same module.
   3617     string: the additional prefix needed to open the header file.
   3618   """
   3619 
   3620   if not filename_cc.endswith('.cc'):
   3621     return (False, '')
   3622   filename_cc = filename_cc[:-len('.cc')]
   3623   if filename_cc.endswith('_unittest'):
   3624     filename_cc = filename_cc[:-len('_unittest')]
   3625   elif filename_cc.endswith('_test'):
   3626     filename_cc = filename_cc[:-len('_test')]
   3627   filename_cc = filename_cc.replace('/public/', '/')
   3628   filename_cc = filename_cc.replace('/internal/', '/')
   3629 
   3630   if not filename_h.endswith('.h'):
   3631     return (False, '')
   3632   filename_h = filename_h[:-len('.h')]
   3633   if filename_h.endswith('-inl'):
   3634     filename_h = filename_h[:-len('-inl')]
   3635   filename_h = filename_h.replace('/public/', '/')
   3636   filename_h = filename_h.replace('/internal/', '/')
   3637 
   3638   files_belong_to_same_module = filename_cc.endswith(filename_h)
   3639   common_path = ''
   3640   if files_belong_to_same_module:
   3641     common_path = filename_cc[:-len(filename_h)]
   3642   return files_belong_to_same_module, common_path
   3643 
   3644 
   3645 def UpdateIncludeState(filename, include_state, io=codecs):
   3646   """Fill up the include_state with new includes found from the file.
   3647 
   3648   Args:
   3649     filename: the name of the header to read.
   3650     include_state: an _IncludeState instance in which the headers are inserted.
   3651     io: The io factory to use to read the file. Provided for testability.
   3652 
   3653   Returns:
   3654     True if a header was succesfully added. False otherwise.
   3655   """
   3656   headerfile = None
   3657   try:
   3658     headerfile = io.open(filename, 'r', 'utf8', 'replace')
   3659   except IOError:
   3660     return False
   3661   linenum = 0
   3662   for line in headerfile:
   3663     linenum += 1
   3664     clean_line = CleanseComments(line)
   3665     match = _RE_PATTERN_INCLUDE.search(clean_line)
   3666     if match:
   3667       include = match.group(2)
   3668       # The value formatting is cute, but not really used right now.
   3669       # What matters here is that the key is in include_state.
   3670       include_state.setdefault(include, '%s:%d' % (filename, linenum))
   3671   return True
   3672 
   3673 
   3674 def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
   3675                               io=codecs):
   3676   """Reports for missing stl includes.
   3677 
   3678   This function will output warnings to make sure you are including the headers
   3679   necessary for the stl containers and functions that you use. We only give one
   3680   reason to include a header. For example, if you use both equal_to<> and
   3681   less<> in a .h file, only one (the latter in the file) of these will be
   3682   reported as a reason to include the <functional>.
   3683 
   3684   Args:
   3685     filename: The name of the current file.
   3686     clean_lines: A CleansedLines instance containing the file.
   3687     include_state: An _IncludeState instance.
   3688     error: The function to call with any errors found.
   3689     io: The IO factory to use to read the header file. Provided for unittest
   3690         injection.
   3691   """
   3692   required = {}  # A map of header name to linenumber and the template entity.
   3693                  # Example of required: { '<functional>': (1219, 'less<>') }
   3694 
   3695   for linenum in xrange(clean_lines.NumLines()):
   3696     line = clean_lines.elided[linenum]
   3697     if not line or line[0] == '#':
   3698       continue
   3699 
   3700     # String is special -- it is a non-templatized type in STL.
   3701     matched = _RE_PATTERN_STRING.search(line)
   3702     if matched:
   3703       # Don't warn about strings in non-STL namespaces:
   3704       # (We check only the first match per line; good enough.)
   3705       prefix = line[:matched.start()]
   3706       if prefix.endswith('std::') or not prefix.endswith('::'):
   3707         required['<string>'] = (linenum, 'string')
   3708 
   3709     for pattern, template, header in _re_pattern_algorithm_header:
   3710       if pattern.search(line):
   3711         required[header] = (linenum, template)
   3712 
   3713     # The following function is just a speed up, no semantics are changed.
   3714     if not '<' in line:  # Reduces the cpu time usage by skipping lines.
   3715       continue
   3716 
   3717     for pattern, template, header in _re_pattern_templates:
   3718       if pattern.search(line):
   3719         required[header] = (linenum, template)
   3720 
   3721   # The policy is that if you #include something in foo.h you don't need to
   3722   # include it again in foo.cc. Here, we will look at possible includes.
   3723   # Let's copy the include_state so it is only messed up within this function.
   3724   include_state = include_state.copy()
   3725 
   3726   # Did we find the header for this file (if any) and succesfully load it?
   3727   header_found = False
   3728 
   3729   # Use the absolute path so that matching works properly.
   3730   abs_filename = FileInfo(filename).FullName()
   3731 
   3732   # For Emacs's flymake.
   3733   # If cpplint is invoked from Emacs's flymake, a temporary file is generated
   3734   # by flymake and that file name might end with '_flymake.cc'. In that case,
   3735   # restore original file name here so that the corresponding header file can be
   3736   # found.
   3737   # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h'
   3738   # instead of 'foo_flymake.h'
   3739   abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename)
   3740 
   3741   # include_state is modified during iteration, so we iterate over a copy of
   3742   # the keys.
   3743   header_keys = include_state.keys()
   3744   for header in header_keys:
   3745     (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
   3746     fullpath = common_path + header
   3747     if same_module and UpdateIncludeState(fullpath, include_state, io):
   3748       header_found = True
   3749 
   3750   # If we can't find the header file for a .cc, assume it's because we don't
   3751   # know where to look. In that case we'll give up as we're not sure they
   3752   # didn't include it in the .h file.
   3753   # TODO(unknown): Do a better job of finding .h files so we are confident that
   3754   # not having the .h file means there isn't one.
   3755   if filename.endswith('.cc') and not header_found:
   3756     return
   3757 
   3758   # All the lines have been processed, report the errors found.
   3759   for required_header_unstripped in required:
   3760     template = required[required_header_unstripped][1]
   3761     if required_header_unstripped.strip('<>"') not in include_state:
   3762       error(filename, required[required_header_unstripped][0],
   3763             'build/include_what_you_use', 4,
   3764             'Add #include ' + required_header_unstripped + ' for ' + template)
   3765 
   3766 
   3767 _RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<')
   3768 
   3769 
   3770 def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error):
   3771   """Check that make_pair's template arguments are deduced.
   3772 
   3773   G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are
   3774   specified explicitly, and such use isn't intended in any case.
   3775 
   3776   Args:
   3777     filename: The name of the current file.
   3778     clean_lines: A CleansedLines instance containing the file.
   3779     linenum: The number of the line to check.
   3780     error: The function to call with any errors found.
   3781   """
   3782   raw = clean_lines.raw_lines
   3783   line = raw[linenum]
   3784   match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line)
   3785   if match:
   3786     error(filename, linenum, 'build/explicit_make_pair',
   3787           4,  # 4 = high confidence
   3788           'For C++11-compatibility, omit template arguments from make_pair'
   3789           ' OR use pair directly OR if appropriate, construct a pair directly')
   3790 
   3791 
   3792 def ProcessLine(filename, file_extension, clean_lines, line,
   3793                 include_state, function_state, nesting_state, error,
   3794                 extra_check_functions=[]):
   3795   """Processes a single line in the file.
   3796 
   3797   Args:
   3798     filename: Filename of the file that is being processed.
   3799     file_extension: The extension (dot not included) of the file.
   3800     clean_lines: An array of strings, each representing a line of the file,
   3801                  with comments stripped.
   3802     line: Number of line being processed.
   3803     include_state: An _IncludeState instance in which the headers are inserted.
   3804     function_state: A _FunctionState instance which counts function lines, etc.
   3805     nesting_state: A _NestingState instance which maintains information about
   3806                    the current stack of nested blocks being parsed.
   3807     error: A callable to which errors are reported, which takes 4 arguments:
   3808            filename, line number, error level, and message
   3809     extra_check_functions: An array of additional check functions that will be
   3810                            run on each source line. Each function takes 4
   3811                            arguments: filename, clean_lines, line, error
   3812   """
   3813   raw_lines = clean_lines.raw_lines
   3814   ParseNolintSuppressions(filename, raw_lines[line], line, error)
   3815   nesting_state.Update(filename, clean_lines, line, error)
   3816   if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM:
   3817     return
   3818   CheckForFunctionLengths(filename, clean_lines, line, function_state, error)
   3819   CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error)
   3820   CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error)
   3821   CheckLanguage(filename, clean_lines, line, file_extension, include_state,
   3822                 error)
   3823   CheckForNonStandardConstructs(filename, clean_lines, line,
   3824                                 nesting_state, error)
   3825   CheckPosixThreading(filename, clean_lines, line, error)
   3826   CheckInvalidIncrement(filename, clean_lines, line, error)
   3827   CheckMakePairUsesDeduction(filename, clean_lines, line, error)
   3828   for check_fn in extra_check_functions:
   3829     check_fn(filename, clean_lines, line, error)
   3830 
   3831 def ProcessFileData(filename, file_extension, lines, error,
   3832                     extra_check_functions=[]):
   3833   """Performs lint checks and reports any errors to the given error function.
   3834 
   3835   Args:
   3836     filename: Filename of the file that is being processed.
   3837     file_extension: The extension (dot not included) of the file.
   3838     lines: An array of strings, each representing a line of the file, with the
   3839            last element being empty if the file is terminated with a newline.
   3840     error: A callable to which errors are reported, which takes 4 arguments:
   3841            filename, line number, error level, and message
   3842     extra_check_functions: An array of additional check functions that will be
   3843                            run on each source line. Each function takes 4
   3844                            arguments: filename, clean_lines, line, error
   3845   """
   3846   lines = (['// marker so line numbers and indices both start at 1'] + lines +
   3847            ['// marker so line numbers end in a known way'])
   3848 
   3849   include_state = _IncludeState()
   3850   function_state = _FunctionState()
   3851   nesting_state = _NestingState()
   3852 
   3853   ResetNolintSuppressions()
   3854 
   3855   CheckForCopyright(filename, lines, error)
   3856 
   3857   if file_extension == 'h':
   3858     CheckForHeaderGuard(filename, lines, error)
   3859 
   3860   RemoveMultiLineComments(filename, lines, error)
   3861   clean_lines = CleansedLines(lines)
   3862   for line in xrange(clean_lines.NumLines()):
   3863     ProcessLine(filename, file_extension, clean_lines, line,
   3864                 include_state, function_state, nesting_state, error,
   3865                 extra_check_functions)
   3866   nesting_state.CheckClassFinished(filename, error)
   3867 
   3868   CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error)
   3869 
   3870   # We check here rather than inside ProcessLine so that we see raw
   3871   # lines rather than "cleaned" lines.
   3872   CheckForUnicodeReplacementCharacters(filename, lines, error)
   3873 
   3874   CheckForNewlineAtEOF(filename, lines, error)
   3875 
   3876 def ProcessFile(filename, vlevel, extra_check_functions=[]):
   3877   """Does google-lint on a single file.
   3878 
   3879   Args:
   3880     filename: The name of the file to parse.
   3881 
   3882     vlevel: The level of errors to report.  Every error of confidence
   3883     >= verbose_level will be reported.  0 is a good default.
   3884 
   3885     extra_check_functions: An array of additional check functions that will be
   3886                            run on each source line. Each function takes 4
   3887                            arguments: filename, clean_lines, line, error
   3888   """
   3889 
   3890   _SetVerboseLevel(vlevel)
   3891 
   3892   try:
   3893     # Support the UNIX convention of using "-" for stdin.  Note that
   3894     # we are not opening the file with universal newline support
   3895     # (which codecs doesn't support anyway), so the resulting lines do
   3896     # contain trailing '\r' characters if we are reading a file that
   3897     # has CRLF endings.
   3898     # If after the split a trailing '\r' is present, it is removed
   3899     # below. If it is not expected to be present (i.e. os.linesep !=
   3900     # '\r\n' as in Windows), a warning is issued below if this file
   3901     # is processed.
   3902 
   3903     if filename == '-':
   3904       lines = codecs.StreamReaderWriter(sys.stdin,
   3905                                         codecs.getreader('utf8'),
   3906                                         codecs.getwriter('utf8'),
   3907                                         'replace').read().split('\n')
   3908     else:
   3909       lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
   3910 
   3911     carriage_return_found = False
   3912     # Remove trailing '\r'.
   3913     for linenum in range(len(lines)):
   3914       if lines[linenum].endswith('\r'):
   3915         lines[linenum] = lines[linenum].rstrip('\r')
   3916         carriage_return_found = True
   3917 
   3918   except IOError:
   3919     sys.stderr.write(
   3920         "Skipping input '%s': Can't open for reading\n" % filename)
   3921     return
   3922 
   3923   # Note, if no dot is found, this will give the entire filename as the ext.
   3924   file_extension = filename[filename.rfind('.') + 1:]
   3925 
   3926   # When reading from stdin, the extension is unknown, so no cpplint tests
   3927   # should rely on the extension.
   3928   if (filename != '-' and file_extension != 'cc' and file_extension != 'h'
   3929       and file_extension != 'cpp'):
   3930     sys.stderr.write('Ignoring %s; not a .cc or .h file\n' % filename)
   3931   else:
   3932     ProcessFileData(filename, file_extension, lines, Error,
   3933                     extra_check_functions)
   3934     if carriage_return_found and os.linesep != '\r\n':
   3935       # Use 0 for linenum since outputting only one error for potentially
   3936       # several lines.
   3937       Error(filename, 0, 'whitespace/newline', 1,
   3938             'One or more unexpected \\r (^M) found;'
   3939             'better to use only a \\n')
   3940 
   3941   sys.stderr.write('Done processing %s\n' % filename)
   3942 
   3943 
   3944 def PrintUsage(message):
   3945   """Prints a brief usage string and exits, optionally with an error message.
   3946 
   3947   Args:
   3948     message: The optional error message.
   3949   """
   3950   sys.stderr.write(_USAGE)
   3951   if message:
   3952     sys.exit('\nFATAL ERROR: ' + message)
   3953   else:
   3954     sys.exit(1)
   3955 
   3956 
   3957 def PrintCategories():
   3958   """Prints a list of all the error-categories used by error messages.
   3959 
   3960   These are the categories used to filter messages via --filter.
   3961   """
   3962   sys.stderr.write(''.join('  %s\n' % cat for cat in _ERROR_CATEGORIES))
   3963   sys.exit(0)
   3964 
   3965 
   3966 def ParseArguments(args):
   3967   """Parses the command line arguments.
   3968 
   3969   This may set the output format and verbosity level as side-effects.
   3970 
   3971   Args:
   3972     args: The command line arguments:
   3973 
   3974   Returns:
   3975     The list of filenames to lint.
   3976   """
   3977   try:
   3978     (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=',
   3979                                                  'stdout', # TODO(enh): added --stdout
   3980                                                  'counting=',
   3981                                                  'filter=',
   3982                                                  'root='])
   3983   except getopt.GetoptError:
   3984     PrintUsage('Invalid arguments.')
   3985 
   3986   verbosity = _VerboseLevel()
   3987   output_format = _OutputFormat()
   3988   output_stream = sys.stderr # TODO(enh): added --stdout
   3989   filters = ''
   3990   counting_style = ''
   3991 
   3992   for (opt, val) in opts:
   3993     if opt == '--help':
   3994       PrintUsage(None)
   3995     elif opt == '--stdout': # TODO(enh): added --stdout
   3996       output_stream = sys.stdout # TODO(enh): added --stdout
   3997     elif opt == '--output':
   3998       if not val in ('emacs', 'vs7', 'eclipse'):
   3999         PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.')
   4000       output_format = val
   4001     elif opt == '--verbose':
   4002       verbosity = int(val)
   4003     elif opt == '--filter':
   4004       filters = val
   4005       if not filters:
   4006         PrintCategories()
   4007     elif opt == '--counting':
   4008       if val not in ('total', 'toplevel', 'detailed'):
   4009         PrintUsage('Valid counting options are total, toplevel, and detailed')
   4010       counting_style = val
   4011     elif opt == '--root':
   4012       global _root
   4013       _root = val
   4014 
   4015   if not filenames:
   4016     PrintUsage('No files were specified.')
   4017 
   4018   _SetOutputFormat(output_format)
   4019   _SetVerboseLevel(verbosity)
   4020   _SetFilters(filters)
   4021   _SetCountingStyle(counting_style)
   4022   sys.stderr = output_stream # TODO(enh): added --stdout
   4023 
   4024   return filenames
   4025 
   4026 
   4027 def main():
   4028   filenames = ParseArguments(sys.argv[1:])
   4029 
   4030   # Change stderr to write with replacement characters so we don't die
   4031   # if we try to print something containing non-ASCII characters.
   4032   sys.stderr = codecs.StreamReaderWriter(sys.stderr,
   4033                                          codecs.getreader('utf8'),
   4034                                          codecs.getwriter('utf8'),
   4035                                          'replace')
   4036 
   4037   _cpplint_state.ResetErrorCounts()
   4038   for filename in filenames:
   4039     ProcessFile(filename, _cpplint_state.verbose_level)
   4040   _cpplint_state.PrintErrorCounts()
   4041 
   4042   sys.exit(_cpplint_state.error_count > 0)
   4043 
   4044 
   4045 if __name__ == '__main__':
   4046   main()
   4047