1 # -*- coding: utf-8 -*- 2 # 3 # Copyright (C) 2009, 2010, 2012 Google Inc. All rights reserved. 4 # Copyright (C) 2009 Torch Mobile Inc. 5 # Copyright (C) 2009 Apple Inc. All rights reserved. 6 # Copyright (C) 2010 Chris Jerdonek (cjerdonek (at] webkit.org) 7 # 8 # Redistribution and use in source and binary forms, with or without 9 # modification, are permitted provided that the following conditions are 10 # met: 11 # 12 # * Redistributions of source code must retain the above copyright 13 # notice, this list of conditions and the following disclaimer. 14 # * Redistributions in binary form must reproduce the above 15 # copyright notice, this list of conditions and the following disclaimer 16 # in the documentation and/or other materials provided with the 17 # distribution. 18 # * Neither the name of Google Inc. nor the names of its 19 # contributors may be used to endorse or promote products derived from 20 # this software without specific prior written permission. 21 # 22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 34 # This is the modified version of Google's cpplint. The original code is 35 # http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py 36 37 """Support for check-webkit-style.""" 38 39 import codecs 40 import math # for log 41 import os 42 import os.path 43 import re 44 import sre_compile 45 import string 46 import sys 47 import unicodedata 48 49 from webkitpy.common.memoized import memoized 50 51 # The key to use to provide a class to fake loading a header file. 52 INCLUDE_IO_INJECTION_KEY = 'include_header_io' 53 54 # Headers that we consider STL headers. 55 _STL_HEADERS = frozenset([ 56 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception', 57 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set', 58 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h', 59 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack', 60 'stl_alloc.h', 'stl_relops.h', 'type_traits.h', 61 'utility', 'vector', 'vector.h', 62 ]) 63 64 65 # Non-STL C++ system headers. 66 _CPP_HEADERS = frozenset([ 67 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype', 68 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath', 69 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef', 70 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype', 71 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream', 72 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip', 73 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h', 74 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h', 75 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h', 76 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h', 77 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept', 78 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string', 79 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray', 80 ]) 81 82 83 # Assertion macros. These are defined in base/logging.h and 84 # testing/base/gunit.h. Note that the _M versions need to come first 85 # for substring matching to work. 86 _CHECK_MACROS = [ 87 'DCHECK', 'CHECK', 88 'EXPECT_TRUE_M', 'EXPECT_TRUE', 89 'ASSERT_TRUE_M', 'ASSERT_TRUE', 90 'EXPECT_FALSE_M', 'EXPECT_FALSE', 91 'ASSERT_FALSE_M', 'ASSERT_FALSE', 92 ] 93 94 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE 95 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS]) 96 97 for op, replacement in [('==', 'EQ'), ('!=', 'NE'), 98 ('>=', 'GE'), ('>', 'GT'), 99 ('<=', 'LE'), ('<', 'LT')]: 100 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement 101 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement 102 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement 103 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement 104 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement 105 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement 106 107 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), 108 ('>=', 'LT'), ('>', 'LE'), 109 ('<=', 'GT'), ('<', 'GE')]: 110 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement 111 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement 112 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement 113 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement 114 115 116 # These constants define types of headers for use with 117 # _IncludeState.check_next_include_order(). 118 _CONFIG_HEADER = 0 119 _PRIMARY_HEADER = 1 120 _OTHER_HEADER = 2 121 _MOC_HEADER = 3 122 123 124 # A dictionary of items customize behavior for unit test. For example, 125 # INCLUDE_IO_INJECTION_KEY allows providing a custom io class which allows 126 # for faking a header file. 127 _unit_test_config = {} 128 129 130 # The regexp compilation caching is inlined in all regexp functions for 131 # performance reasons; factoring it out into a separate function turns out 132 # to be noticeably expensive. 133 _regexp_compile_cache = {} 134 135 136 def match(pattern, s): 137 """Matches the string with the pattern, caching the compiled regexp.""" 138 if not pattern in _regexp_compile_cache: 139 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 140 return _regexp_compile_cache[pattern].match(s) 141 142 143 def search(pattern, s): 144 """Searches the string for the pattern, caching the compiled regexp.""" 145 if not pattern in _regexp_compile_cache: 146 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 147 return _regexp_compile_cache[pattern].search(s) 148 149 150 def sub(pattern, replacement, s): 151 """Substitutes occurrences of a pattern, caching the compiled regexp.""" 152 if not pattern in _regexp_compile_cache: 153 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 154 return _regexp_compile_cache[pattern].sub(replacement, s) 155 156 157 def subn(pattern, replacement, s): 158 """Substitutes occurrences of a pattern, caching the compiled regexp.""" 159 if not pattern in _regexp_compile_cache: 160 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 161 return _regexp_compile_cache[pattern].subn(replacement, s) 162 163 164 def iteratively_replace_matches_with_char(pattern, char_replacement, s): 165 """Returns the string with replacement done. 166 167 Every character in the match is replaced with char. 168 Due to the iterative nature, pattern should not match char or 169 there will be an infinite loop. 170 171 Example: 172 pattern = r'<[^>]>' # template parameters 173 char_replacement = '_' 174 s = 'A<B<C, D>>' 175 Returns 'A_________' 176 177 Args: 178 pattern: The regex to match. 179 char_replacement: The character to put in place of every 180 character of the match. 181 s: The string on which to do the replacements. 182 183 Returns: 184 True, if the given line is blank. 185 """ 186 while True: 187 matched = search(pattern, s) 188 if not matched: 189 return s 190 start_match_index = matched.start(0) 191 end_match_index = matched.end(0) 192 match_length = end_match_index - start_match_index 193 s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:] 194 195 196 def _find_in_lines(regex, lines, start_position, not_found_position): 197 """Does a find starting at start position and going forward until 198 a match is found. 199 200 Returns the position where the regex started. 201 """ 202 current_row = start_position.row 203 204 # Start with the given row and trim off everything before what should be matched. 205 current_line = lines[start_position.row][start_position.column:] 206 starting_offset = start_position.column 207 while True: 208 found_match = search(regex, current_line) 209 if found_match: 210 return Position(current_row, starting_offset + found_match.start()) 211 212 # A match was not found so continue forward. 213 current_row += 1 214 starting_offset = 0 215 if current_row >= len(lines): 216 return not_found_position 217 current_line = lines[current_row] 218 219 def _rfind_in_lines(regex, lines, start_position, not_found_position): 220 """Does a reverse find starting at start position and going backwards until 221 a match is found. 222 223 Returns the position where the regex ended. 224 """ 225 # Put the regex in a group and proceed it with a greedy expression that 226 # matches anything to ensure that we get the last possible match in a line. 227 last_in_line_regex = r'.*(' + regex + ')' 228 current_row = start_position.row 229 230 # Start with the given row and trim off everything past what may be matched. 231 current_line = lines[start_position.row][:start_position.column] 232 while True: 233 found_match = match(last_in_line_regex, current_line) 234 if found_match: 235 return Position(current_row, found_match.end(1)) 236 237 # A match was not found so continue backward. 238 current_row -= 1 239 if current_row < 0: 240 return not_found_position 241 current_line = lines[current_row] 242 243 244 def _convert_to_lower_with_underscores(text): 245 """Converts all text strings in camelCase or PascalCase to lowers with underscores.""" 246 247 # First add underscores before any capital letter followed by a lower case letter 248 # as long as it is in a word. 249 # (This put an underscore before Password but not P and A in WPAPassword). 250 text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text) 251 252 # Next add underscores before capitals at the end of words if it was 253 # preceeded by lower case letter or number. 254 # (This puts an underscore before A in isA but not A in CBA). 255 text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text) 256 257 # Next add underscores when you have a captial letter which is followed by a capital letter 258 # but is not proceeded by one. (This puts an underscore before A in 'WordADay'). 259 text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text) 260 261 return text.lower() 262 263 264 265 def _create_acronym(text): 266 """Creates an acronym for the given text.""" 267 # Removes all lower case letters except those starting words. 268 text = sub(r'(?<!\b)[a-z]', '', text) 269 return text.upper() 270 271 272 def up_to_unmatched_closing_paren(s): 273 """Splits a string into two parts up to first unmatched ')'. 274 275 Args: 276 s: a string which is a substring of line after '(' 277 (e.g., "a == (b + c))"). 278 279 Returns: 280 A pair of strings (prefix before first unmatched ')', 281 remainder of s after first unmatched ')'), e.g., 282 up_to_unmatched_closing_paren("a == (b + c)) { ") 283 returns "a == (b + c)", " {". 284 Returns None, None if there is no unmatched ')' 285 286 """ 287 i = 1 288 for pos, c in enumerate(s): 289 if c == '(': 290 i += 1 291 elif c == ')': 292 i -= 1 293 if i == 0: 294 return s[:pos], s[pos + 1:] 295 return None, None 296 297 class _IncludeState(dict): 298 """Tracks line numbers for includes, and the order in which includes appear. 299 300 As a dict, an _IncludeState object serves as a mapping between include 301 filename and line number on which that file was included. 302 303 Call check_next_include_order() once for each header in the file, passing 304 in the type constants defined above. Calls in an illegal order will 305 raise an _IncludeError with an appropriate error message. 306 307 """ 308 # self._section will move monotonically through this set. If it ever 309 # needs to move backwards, check_next_include_order will raise an error. 310 _INITIAL_SECTION = 0 311 _CONFIG_SECTION = 1 312 _PRIMARY_SECTION = 2 313 _OTHER_SECTION = 3 314 315 _TYPE_NAMES = { 316 _CONFIG_HEADER: 'WebCore config.h', 317 _PRIMARY_HEADER: 'header this file implements', 318 _OTHER_HEADER: 'other header', 319 _MOC_HEADER: 'moc file', 320 } 321 _SECTION_NAMES = { 322 _INITIAL_SECTION: "... nothing.", 323 _CONFIG_SECTION: "WebCore config.h.", 324 _PRIMARY_SECTION: 'a header this file implements.', 325 _OTHER_SECTION: 'other header.', 326 } 327 328 def __init__(self): 329 dict.__init__(self) 330 self._section = self._INITIAL_SECTION 331 self._visited_primary_section = False 332 self.header_types = dict(); 333 334 def visited_primary_section(self): 335 return self._visited_primary_section 336 337 def check_next_include_order(self, header_type, file_is_header, primary_header_exists): 338 """Returns a non-empty error message if the next header is out of order. 339 340 This function also updates the internal state to be ready to check 341 the next include. 342 343 Args: 344 header_type: One of the _XXX_HEADER constants defined above. 345 file_is_header: Whether the file that owns this _IncludeState is itself a header 346 347 Returns: 348 The empty string if the header is in the right order, or an 349 error message describing what's wrong. 350 351 """ 352 if header_type == _CONFIG_HEADER and file_is_header: 353 return 'Header file should not contain WebCore config.h.' 354 if header_type == _PRIMARY_HEADER and file_is_header: 355 return 'Header file should not contain itself.' 356 if header_type == _MOC_HEADER: 357 return '' 358 359 error_message = '' 360 if self._section != self._OTHER_SECTION: 361 before_error_message = ('Found %s before %s' % 362 (self._TYPE_NAMES[header_type], 363 self._SECTION_NAMES[self._section + 1])) 364 after_error_message = ('Found %s after %s' % 365 (self._TYPE_NAMES[header_type], 366 self._SECTION_NAMES[self._section])) 367 368 if header_type == _CONFIG_HEADER: 369 if self._section >= self._CONFIG_SECTION: 370 error_message = after_error_message 371 self._section = self._CONFIG_SECTION 372 elif header_type == _PRIMARY_HEADER: 373 if self._section >= self._PRIMARY_SECTION: 374 error_message = after_error_message 375 elif self._section < self._CONFIG_SECTION: 376 error_message = before_error_message 377 self._section = self._PRIMARY_SECTION 378 self._visited_primary_section = True 379 else: 380 assert header_type == _OTHER_HEADER 381 if not file_is_header and self._section < self._PRIMARY_SECTION: 382 if primary_header_exists: 383 error_message = before_error_message 384 self._section = self._OTHER_SECTION 385 386 return error_message 387 388 389 class Position(object): 390 """Holds the position of something.""" 391 def __init__(self, row, column): 392 self.row = row 393 self.column = column 394 395 def __str__(self): 396 return '(%s, %s)' % (self.row, self.column) 397 398 def __cmp__(self, other): 399 return self.row.__cmp__(other.row) or self.column.__cmp__(other.column) 400 401 402 class Parameter(object): 403 """Information about one function parameter.""" 404 def __init__(self, parameter, parameter_name_index, row): 405 self.type = parameter[:parameter_name_index].strip() 406 # Remove any initializers from the parameter name (e.g. int i = 5). 407 self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip() 408 self.row = row 409 410 @memoized 411 def lower_with_underscores_name(self): 412 """Returns the parameter name in the lower with underscores format.""" 413 return _convert_to_lower_with_underscores(self.name) 414 415 416 class SingleLineView(object): 417 """Converts multiple lines into a single line (with line breaks replaced by a 418 space) to allow for easier searching.""" 419 def __init__(self, lines, start_position, end_position): 420 """Create a SingleLineView instance. 421 422 Args: 423 lines: a list of multiple lines to combine into a single line. 424 start_position: offset within lines of where to start the single line. 425 end_position: just after where to end (like a slice operation). 426 """ 427 # Get the rows of interest. 428 trimmed_lines = lines[start_position.row:end_position.row + 1] 429 430 # Remove the columns on the last line that aren't included. 431 trimmed_lines[-1] = trimmed_lines[-1][:end_position.column] 432 433 # Remove the columns on the first line that aren't included. 434 trimmed_lines[0] = trimmed_lines[0][start_position.column:] 435 436 # Create a single line with all of the parameters. 437 self.single_line = ' '.join(trimmed_lines) 438 439 # Keep the row lengths, so we can calculate the original row number 440 # given a column in the single line (adding 1 due to the space added 441 # during the join). 442 self._row_lengths = [len(line) + 1 for line in trimmed_lines] 443 self._starting_row = start_position.row 444 445 def convert_column_to_row(self, single_line_column_number): 446 """Convert the column number from the single line into the original 447 line number. 448 449 Special cases: 450 * Columns in the added spaces are considered part of the previous line. 451 * Columns beyond the end of the line are consider part the last line 452 in the view.""" 453 total_columns = 0 454 row_offset = 0 455 while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]: 456 total_columns += self._row_lengths[row_offset] 457 row_offset += 1 458 return self._starting_row + row_offset 459 460 461 def create_skeleton_parameters(all_parameters): 462 """Converts a parameter list to a skeleton version. 463 464 The skeleton only has one word for the parameter name, one word for the type, 465 and commas after each parameter and only there. Everything in the skeleton 466 remains in the same columns as the original.""" 467 all_simplifications = ( 468 # Remove template parameters, function declaration parameters, etc. 469 r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})', 470 # Remove all initializers. 471 r'=[^,]*', 472 # Remove :: and everything before it. 473 r'[^,]*::', 474 # Remove modifiers like &, *. 475 r'[&*]', 476 # Remove const modifiers. 477 r'\bconst\s+(?=[A-Za-z])', 478 # Remove numerical modifiers like long. 479 r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)') 480 481 skeleton_parameters = all_parameters 482 for simplification in all_simplifications: 483 skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters) 484 # If there are any parameters, then add a , after the last one to 485 # make a regular pattern of a , following every parameter. 486 if skeleton_parameters.strip(): 487 skeleton_parameters += ',' 488 return skeleton_parameters 489 490 491 def find_parameter_name_index(skeleton_parameter): 492 """Determines where the parametere name starts given the skeleton parameter.""" 493 # The first space from the right in the simplified parameter is where the parameter 494 # name starts unless the first space is before any content in the simplified parameter. 495 before_name_index = skeleton_parameter.rstrip().rfind(' ') 496 if before_name_index != -1 and skeleton_parameter[:before_name_index].strip(): 497 return before_name_index + 1 498 return len(skeleton_parameter) 499 500 501 def parameter_list(elided_lines, start_position, end_position): 502 """Generator for a function's parameters.""" 503 # Create new positions that omit the outer parenthesis of the parameters. 504 start_position = Position(row=start_position.row, column=start_position.column + 1) 505 end_position = Position(row=end_position.row, column=end_position.column - 1) 506 single_line_view = SingleLineView(elided_lines, start_position, end_position) 507 skeleton_parameters = create_skeleton_parameters(single_line_view.single_line) 508 end_index = -1 509 510 while True: 511 # Find the end of the next parameter. 512 start_index = end_index + 1 513 end_index = skeleton_parameters.find(',', start_index) 514 515 # No comma means that all parameters have been parsed. 516 if end_index == -1: 517 return 518 row = single_line_view.convert_column_to_row(end_index) 519 520 # Parse the parameter into a type and parameter name. 521 skeleton_parameter = skeleton_parameters[start_index:end_index] 522 name_offset = find_parameter_name_index(skeleton_parameter) 523 parameter = single_line_view.single_line[start_index:end_index] 524 yield Parameter(parameter, name_offset, row) 525 526 527 class _FunctionState(object): 528 """Tracks current function name and the number of lines in its body. 529 530 Attributes: 531 min_confidence: The minimum confidence level to use while checking style. 532 533 """ 534 535 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. 536 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. 537 538 def __init__(self, min_confidence): 539 self.min_confidence = min_confidence 540 self.current_function = '' 541 self.in_a_function = False 542 self.lines_in_function = 0 543 # Make sure these will not be mistaken for real positions (even when a 544 # small amount is added to them). 545 self.body_start_position = Position(-1000, 0) 546 self.end_position = Position(-1000, 0) 547 548 def begin(self, function_name, function_name_start_position, body_start_position, end_position, 549 parameter_start_position, parameter_end_position, clean_lines): 550 """Start analyzing function body. 551 552 Args: 553 function_name: The name of the function being tracked. 554 function_name_start_position: Position in elided where the function name starts. 555 body_start_position: Position in elided of the { or the ; for a prototype. 556 end_position: Position in elided just after the final } (or ; is. 557 parameter_start_position: Position in elided of the '(' for the parameters. 558 parameter_end_position: Position in elided just after the ')' for the parameters. 559 clean_lines: A CleansedLines instance containing the file. 560 """ 561 self.in_a_function = True 562 self.lines_in_function = -1 # Don't count the open brace line. 563 self.current_function = function_name 564 self.function_name_start_position = function_name_start_position 565 self.body_start_position = body_start_position 566 self.end_position = end_position 567 self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';' 568 self.parameter_start_position = parameter_start_position 569 self.parameter_end_position = parameter_end_position 570 self.is_pure = False 571 if self.is_declaration: 572 characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line 573 self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters)) 574 self._clean_lines = clean_lines 575 self._parameter_list = None 576 577 def modifiers_and_return_type(self): 578 """Returns the modifiers and the return type.""" 579 # Go backwards from where the function name is until we encounter one of several things: 580 # ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0) 581 elided = self._clean_lines.elided 582 start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)', 583 elided, self.parameter_start_position, Position(0, 0)) 584 return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip() 585 586 def parameter_list(self): 587 if not self._parameter_list: 588 # Store the final result as a tuple since that is immutable. 589 self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position)) 590 591 return self._parameter_list 592 593 def count(self, line_number): 594 """Count line in current function body.""" 595 if self.in_a_function and line_number >= self.body_start_position.row: 596 self.lines_in_function += 1 597 598 def check(self, error, line_number): 599 """Report if too many lines in function body. 600 601 Args: 602 error: The function to call with any errors found. 603 line_number: The number of the line to check. 604 """ 605 if match(r'T(EST|est)', self.current_function): 606 base_trigger = self._TEST_TRIGGER 607 else: 608 base_trigger = self._NORMAL_TRIGGER 609 trigger = base_trigger * 2 ** self.min_confidence 610 611 if self.lines_in_function > trigger: 612 error_level = int(math.log(self.lines_in_function / base_trigger, 2)) 613 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... 614 if error_level > 5: 615 error_level = 5 616 error(line_number, 'readability/fn_size', error_level, 617 'Small and focused functions are preferred:' 618 ' %s has %d non-comment lines' 619 ' (error triggered by exceeding %d lines).' % ( 620 self.current_function, self.lines_in_function, trigger)) 621 622 def end(self): 623 """Stop analyzing function body.""" 624 self.in_a_function = False 625 626 627 class _IncludeError(Exception): 628 """Indicates a problem with the include order in a file.""" 629 pass 630 631 632 class FileInfo: 633 """Provides utility functions for filenames. 634 635 FileInfo provides easy access to the components of a file's path 636 relative to the project root. 637 """ 638 639 def __init__(self, filename): 640 self._filename = filename 641 642 def full_name(self): 643 """Make Windows paths like Unix.""" 644 return os.path.abspath(self._filename).replace('\\', '/') 645 646 def repository_name(self): 647 """Full name after removing the local path to the repository. 648 649 If we have a real absolute path name here we can try to do something smart: 650 detecting the root of the checkout and truncating /path/to/checkout from 651 the name so that we get header guards that don't include things like 652 "C:\Documents and Settings\..." or "/home/username/..." in them and thus 653 people on different computers who have checked the source out to different 654 locations won't see bogus errors. 655 """ 656 fullname = self.full_name() 657 658 if os.path.exists(fullname): 659 project_dir = os.path.dirname(fullname) 660 661 if os.path.exists(os.path.join(project_dir, ".svn")): 662 # If there's a .svn file in the current directory, we 663 # recursively look up the directory tree for the top 664 # of the SVN checkout 665 root_dir = project_dir 666 one_up_dir = os.path.dirname(root_dir) 667 while os.path.exists(os.path.join(one_up_dir, ".svn")): 668 root_dir = os.path.dirname(root_dir) 669 one_up_dir = os.path.dirname(one_up_dir) 670 671 prefix = os.path.commonprefix([root_dir, project_dir]) 672 return fullname[len(prefix) + 1:] 673 674 # Not SVN? Try to find a git top level directory by 675 # searching up from the current path. 676 root_dir = os.path.dirname(fullname) 677 while (root_dir != os.path.dirname(root_dir) 678 and not os.path.exists(os.path.join(root_dir, ".git"))): 679 root_dir = os.path.dirname(root_dir) 680 if os.path.exists(os.path.join(root_dir, ".git")): 681 prefix = os.path.commonprefix([root_dir, project_dir]) 682 return fullname[len(prefix) + 1:] 683 684 # Don't know what to do; header guard warnings may be wrong... 685 return fullname 686 687 def split(self): 688 """Splits the file into the directory, basename, and extension. 689 690 For 'chrome/browser/browser.cpp', Split() would 691 return ('chrome/browser', 'browser', '.cpp') 692 693 Returns: 694 A tuple of (directory, basename, extension). 695 """ 696 697 googlename = self.repository_name() 698 project, rest = os.path.split(googlename) 699 return (project,) + os.path.splitext(rest) 700 701 def base_name(self): 702 """File base name - text after the final slash, before the final period.""" 703 return self.split()[1] 704 705 def extension(self): 706 """File extension - text following the final period.""" 707 return self.split()[2] 708 709 def no_extension(self): 710 """File has no source file extension.""" 711 return '/'.join(self.split()[0:2]) 712 713 def is_source(self): 714 """File has a source file extension.""" 715 return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx') 716 717 718 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard. 719 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( 720 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') 721 # Matches strings. Escape codes should already be removed by ESCAPES. 722 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"') 723 # Matches characters. Escape codes should already be removed by ESCAPES. 724 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'") 725 # Matches multi-line C++ comments. 726 # This RE is a little bit more complicated than one might expect, because we 727 # have to take care of space removals tools so we can handle comments inside 728 # statements better. 729 # The current rule is: We only clear spaces from both sides when we're at the 730 # end of the line. Otherwise, we try to remove spaces from the right side, 731 # if this doesn't work we try on left side but only if there's a non-character 732 # on the right. 733 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( 734 r"""(\s*/\*.*\*/\s*$| 735 /\*.*\*/\s+| 736 \s+/\*.*\*/(?=\W)| 737 /\*.*\*/)""", re.VERBOSE) 738 739 740 def is_cpp_string(line): 741 """Does line terminate so, that the next symbol is in string constant. 742 743 This function does not consider single-line nor multi-line comments. 744 745 Args: 746 line: is a partial line of code starting from the 0..n. 747 748 Returns: 749 True, if next character appended to 'line' is inside a 750 string constant. 751 """ 752 753 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" 754 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 755 756 757 def find_next_multi_line_comment_start(lines, line_index): 758 """Find the beginning marker for a multiline comment.""" 759 while line_index < len(lines): 760 if lines[line_index].strip().startswith('/*'): 761 # Only return this marker if the comment goes beyond this line 762 if lines[line_index].strip().find('*/', 2) < 0: 763 return line_index 764 line_index += 1 765 return len(lines) 766 767 768 def find_next_multi_line_comment_end(lines, line_index): 769 """We are inside a comment, find the end marker.""" 770 while line_index < len(lines): 771 if lines[line_index].strip().endswith('*/'): 772 return line_index 773 line_index += 1 774 return len(lines) 775 776 777 def remove_multi_line_comments_from_range(lines, begin, end): 778 """Clears a range of lines for multi-line comments.""" 779 # Having // dummy comments makes the lines non-empty, so we will not get 780 # unnecessary blank line warnings later in the code. 781 for i in range(begin, end): 782 lines[i] = '// dummy' 783 784 785 def remove_multi_line_comments(lines, error): 786 """Removes multiline (c-style) comments from lines.""" 787 line_index = 0 788 while line_index < len(lines): 789 line_index_begin = find_next_multi_line_comment_start(lines, line_index) 790 if line_index_begin >= len(lines): 791 return 792 line_index_end = find_next_multi_line_comment_end(lines, line_index_begin) 793 if line_index_end >= len(lines): 794 error(line_index_begin + 1, 'readability/multiline_comment', 5, 795 'Could not find end of multi-line comment') 796 return 797 remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1) 798 line_index = line_index_end + 1 799 800 801 def cleanse_comments(line): 802 """Removes //-comments and single-line C-style /* */ comments. 803 804 Args: 805 line: A line of C++ source. 806 807 Returns: 808 The line with single-line comments removed. 809 """ 810 comment_position = line.find('//') 811 if comment_position != -1 and not is_cpp_string(line[:comment_position]): 812 line = line[:comment_position] 813 # get rid of /* ... */ 814 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) 815 816 817 class CleansedLines(object): 818 """Holds 3 copies of all lines with different preprocessing applied to them. 819 820 1) elided member contains lines without strings and comments, 821 2) lines member contains lines without comments, and 822 3) raw member contains all the lines without processing. 823 All these three members are of <type 'list'>, and of the same length. 824 """ 825 826 def __init__(self, lines): 827 self.elided = [] 828 self.lines = [] 829 self.raw_lines = lines 830 self._num_lines = len(lines) 831 for line_number in range(len(lines)): 832 self.lines.append(cleanse_comments(lines[line_number])) 833 elided = self.collapse_strings(lines[line_number]) 834 self.elided.append(cleanse_comments(elided)) 835 836 def num_lines(self): 837 """Returns the number of lines represented.""" 838 return self._num_lines 839 840 @staticmethod 841 def collapse_strings(elided): 842 """Collapses strings and chars on a line to simple "" or '' blocks. 843 844 We nix strings first so we're not fooled by text like '"http://"' 845 846 Args: 847 elided: The line being processed. 848 849 Returns: 850 The line with collapsed strings. 851 """ 852 if not _RE_PATTERN_INCLUDE.match(elided): 853 # Remove escaped characters first to make quote/single quote collapsing 854 # basic. Things that look like escaped characters shouldn't occur 855 # outside of strings and chars. 856 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) 857 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided) 858 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided) 859 return elided 860 861 862 def close_expression(elided, position): 863 """If input points to ( or { or [, finds the position that closes it. 864 865 If elided[position.row][position.column] points to a '(' or '{' or '[', 866 finds the line_number/pos that correspond to the closing of the expression. 867 868 Args: 869 elided: A CleansedLines.elided instance containing the file. 870 position: The position of the opening item. 871 872 Returns: 873 The Position *past* the closing brace, or Position(len(elided), -1) 874 if we never find a close. Note we ignore strings and comments when matching. 875 """ 876 line = elided[position.row] 877 start_character = line[position.column] 878 if start_character == '(': 879 enclosing_character_regex = r'[\(\)]' 880 elif start_character == '[': 881 enclosing_character_regex = r'[\[\]]' 882 elif start_character == '{': 883 enclosing_character_regex = r'[\{\}]' 884 else: 885 return Position(len(elided), -1) 886 887 current_column = position.column + 1 888 line_number = position.row 889 net_open = 1 890 for line in elided[position.row:]: 891 line = line[current_column:] 892 893 # Search the current line for opening and closing characters. 894 while True: 895 next_enclosing_character = search(enclosing_character_regex, line) 896 # No more on this line. 897 if not next_enclosing_character: 898 break 899 current_column += next_enclosing_character.end(0) 900 line = line[next_enclosing_character.end(0):] 901 if next_enclosing_character.group(0) == start_character: 902 net_open += 1 903 else: 904 net_open -= 1 905 if not net_open: 906 return Position(line_number, current_column) 907 908 # Proceed to the next line. 909 line_number += 1 910 current_column = 0 911 912 # The given item was not closed. 913 return Position(len(elided), -1) 914 915 def check_for_copyright(lines, error): 916 """Logs an error if no Copyright message appears at the top of the file.""" 917 918 # We'll say it should occur by line 10. Don't forget there's a 919 # dummy line at the front. 920 for line in xrange(1, min(len(lines), 11)): 921 if re.search(r'Copyright', lines[line], re.I): 922 break 923 else: # means no copyright line was found 924 error(0, 'legal/copyright', 5, 925 'No copyright message found. ' 926 'You should have a line: "Copyright [year] <Copyright Owner>"') 927 928 929 def get_header_guard_cpp_variable(filename): 930 """Returns the CPP variable that should be used as a header guard. 931 932 Args: 933 filename: The name of a C++ header file. 934 935 Returns: 936 The CPP variable that should be used as a header guard in the 937 named file. 938 939 """ 940 941 # Restores original filename in case that style checker is invoked from Emacs's 942 # flymake. 943 filename = re.sub(r'_flymake\.h$', '.h', filename) 944 945 standard_name = sub(r'[-.\s]', '_', os.path.basename(filename)) 946 947 # Files under WTF typically have header guards that start with WTF_. 948 if '/wtf/' in filename: 949 special_name = "WTF_" + standard_name 950 else: 951 special_name = standard_name 952 return (special_name, standard_name) 953 954 955 def check_for_header_guard(filename, lines, error): 956 """Checks that the file contains a header guard. 957 958 Logs an error if no #ifndef header guard is present. For other 959 headers, checks that the full pathname is used. 960 961 Args: 962 filename: The name of the C++ header file. 963 lines: An array of strings, each representing a line of the file. 964 error: The function to call with any errors found. 965 """ 966 967 cppvar = get_header_guard_cpp_variable(filename) 968 969 ifndef = None 970 ifndef_line_number = 0 971 define = None 972 for line_number, line in enumerate(lines): 973 line_split = line.split() 974 if len(line_split) >= 2: 975 # find the first occurrence of #ifndef and #define, save arg 976 if not ifndef and line_split[0] == '#ifndef': 977 # set ifndef to the header guard presented on the #ifndef line. 978 ifndef = line_split[1] 979 ifndef_line_number = line_number 980 if not define and line_split[0] == '#define': 981 define = line_split[1] 982 if define and ifndef: 983 break 984 985 if not ifndef or not define or ifndef != define: 986 error(0, 'build/header_guard', 5, 987 'No #ifndef header guard found, suggested CPP variable is: %s' % 988 cppvar[0]) 989 return 990 991 # The guard should be File_h. 992 if ifndef not in cppvar: 993 error(ifndef_line_number, 'build/header_guard', 5, 994 '#ifndef header guard has wrong style, please use: %s' % cppvar[0]) 995 996 997 def check_for_unicode_replacement_characters(lines, error): 998 """Logs an error for each line containing Unicode replacement characters. 999 1000 These indicate that either the file contained invalid UTF-8 (likely) 1001 or Unicode replacement characters (which it shouldn't). Note that 1002 it's possible for this to throw off line numbering if the invalid 1003 UTF-8 occurred adjacent to a newline. 1004 1005 Args: 1006 lines: An array of strings, each representing a line of the file. 1007 error: The function to call with any errors found. 1008 """ 1009 for line_number, line in enumerate(lines): 1010 if u'\ufffd' in line: 1011 error(line_number, 'readability/utf8', 5, 1012 'Line contains invalid UTF-8 (or Unicode replacement character).') 1013 1014 1015 def check_for_new_line_at_eof(lines, error): 1016 """Logs an error if there is no newline char at the end of the file. 1017 1018 Args: 1019 lines: An array of strings, each representing a line of the file. 1020 error: The function to call with any errors found. 1021 """ 1022 1023 # The array lines() was created by adding two newlines to the 1024 # original file (go figure), then splitting on \n. 1025 # To verify that the file ends in \n, we just have to make sure the 1026 # last-but-two element of lines() exists and is empty. 1027 if len(lines) < 3 or lines[-2]: 1028 error(len(lines) - 2, 'whitespace/ending_newline', 5, 1029 'Could not find a newline character at the end of the file.') 1030 1031 1032 def check_for_multiline_comments_and_strings(clean_lines, line_number, error): 1033 """Logs an error if we see /* ... */ or "..." that extend past one line. 1034 1035 /* ... */ comments are legit inside macros, for one line. 1036 Otherwise, we prefer // comments, so it's ok to warn about the 1037 other. Likewise, it's ok for strings to extend across multiple 1038 lines, as long as a line continuation character (backslash) 1039 terminates each line. Although not currently prohibited by the C++ 1040 style guide, it's ugly and unnecessary. We don't do well with either 1041 in this lint program, so we warn about both. 1042 1043 Args: 1044 clean_lines: A CleansedLines instance containing the file. 1045 line_number: The number of the line to check. 1046 error: The function to call with any errors found. 1047 """ 1048 line = clean_lines.elided[line_number] 1049 1050 # Remove all \\ (escaped backslashes) from the line. They are OK, and the 1051 # second (escaped) slash may trigger later \" detection erroneously. 1052 line = line.replace('\\\\', '') 1053 1054 if line.count('/*') > line.count('*/'): 1055 error(line_number, 'readability/multiline_comment', 5, 1056 'Complex multi-line /*...*/-style comment found. ' 1057 'Lint may give bogus warnings. ' 1058 'Consider replacing these with //-style comments, ' 1059 'with #if 0...#endif, ' 1060 'or with more clearly structured multi-line comments.') 1061 1062 if (line.count('"') - line.count('\\"')) % 2: 1063 error(line_number, 'readability/multiline_string', 5, 1064 'Multi-line string ("...") found. This lint script doesn\'t ' 1065 'do well with such strings, and may give bogus warnings. They\'re ' 1066 'ugly and unnecessary, and you should use concatenation instead".') 1067 1068 1069 _THREADING_LIST = ( 1070 ('asctime(', 'asctime_r('), 1071 ('ctime(', 'ctime_r('), 1072 ('getgrgid(', 'getgrgid_r('), 1073 ('getgrnam(', 'getgrnam_r('), 1074 ('getlogin(', 'getlogin_r('), 1075 ('getpwnam(', 'getpwnam_r('), 1076 ('getpwuid(', 'getpwuid_r('), 1077 ('gmtime(', 'gmtime_r('), 1078 ('localtime(', 'localtime_r('), 1079 ('rand(', 'rand_r('), 1080 ('readdir(', 'readdir_r('), 1081 ('strtok(', 'strtok_r('), 1082 ('ttyname(', 'ttyname_r('), 1083 ) 1084 1085 1086 def check_posix_threading(clean_lines, line_number, error): 1087 """Checks for calls to thread-unsafe functions. 1088 1089 Much code has been originally written without consideration of 1090 multi-threading. Also, engineers are relying on their old experience; 1091 they have learned posix before threading extensions were added. These 1092 tests guide the engineers to use thread-safe functions (when using 1093 posix directly). 1094 1095 Args: 1096 clean_lines: A CleansedLines instance containing the file. 1097 line_number: The number of the line to check. 1098 error: The function to call with any errors found. 1099 """ 1100 line = clean_lines.elided[line_number] 1101 for single_thread_function, multithread_safe_function in _THREADING_LIST: 1102 index = line.find(single_thread_function) 1103 # Comparisons made explicit for clarity 1104 if index >= 0 and (index == 0 or (not line[index - 1].isalnum() 1105 and line[index - 1] not in ('_', '.', '>'))): 1106 error(line_number, 'runtime/threadsafe_fn', 2, 1107 'Consider using ' + multithread_safe_function + 1108 '...) instead of ' + single_thread_function + 1109 '...) for improved thread safety.') 1110 1111 1112 # Matches invalid increment: *count++, which moves pointer instead of 1113 # incrementing a value. 1114 _RE_PATTERN_INVALID_INCREMENT = re.compile( 1115 r'^\s*\*\w+(\+\+|--);') 1116 1117 1118 def check_invalid_increment(clean_lines, line_number, error): 1119 """Checks for invalid increment *count++. 1120 1121 For example following function: 1122 void increment_counter(int* count) { 1123 *count++; 1124 } 1125 is invalid, because it effectively does count++, moving pointer, and should 1126 be replaced with ++*count, (*count)++ or *count += 1. 1127 1128 Args: 1129 clean_lines: A CleansedLines instance containing the file. 1130 line_number: The number of the line to check. 1131 error: The function to call with any errors found. 1132 """ 1133 line = clean_lines.elided[line_number] 1134 if _RE_PATTERN_INVALID_INCREMENT.match(line): 1135 error(line_number, 'runtime/invalid_increment', 5, 1136 'Changing pointer instead of value (or unused value of operator*).') 1137 1138 1139 class _ClassInfo(object): 1140 """Stores information about a class.""" 1141 1142 def __init__(self, name, line_number): 1143 self.name = name 1144 self.line_number = line_number 1145 self.seen_open_brace = False 1146 self.is_derived = False 1147 self.virtual_method_line_number = None 1148 self.has_virtual_destructor = False 1149 self.brace_depth = 0 1150 self.unsigned_bitfields = [] 1151 self.bool_bitfields = [] 1152 1153 1154 class _ClassState(object): 1155 """Holds the current state of the parse relating to class declarations. 1156 1157 It maintains a stack of _ClassInfos representing the parser's guess 1158 as to the current nesting of class declarations. The innermost class 1159 is at the top (back) of the stack. Typically, the stack will either 1160 be empty or have exactly one entry. 1161 """ 1162 1163 def __init__(self): 1164 self.classinfo_stack = [] 1165 1166 def check_finished(self, error): 1167 """Checks that all classes have been completely parsed. 1168 1169 Call this when all lines in a file have been processed. 1170 Args: 1171 error: The function to call with any errors found. 1172 """ 1173 if self.classinfo_stack: 1174 # Note: This test can result in false positives if #ifdef constructs 1175 # get in the way of brace matching. See the testBuildClass test in 1176 # cpp_style_unittest.py for an example of this. 1177 error(self.classinfo_stack[0].line_number, 'build/class', 5, 1178 'Failed to find complete declaration of class %s' % 1179 self.classinfo_stack[0].name) 1180 1181 1182 class _FileState(object): 1183 def __init__(self, clean_lines, file_extension): 1184 self._did_inside_namespace_indent_warning = False 1185 self._clean_lines = clean_lines 1186 if file_extension in ['m', 'mm']: 1187 self._is_objective_c = True 1188 self._is_c = False 1189 elif file_extension == 'h': 1190 # In the case of header files, it is unknown if the file 1191 # is c / objective c or not, so set this value to None and then 1192 # if it is requested, use heuristics to guess the value. 1193 self._is_objective_c = None 1194 self._is_c = None 1195 elif file_extension == 'c': 1196 self._is_c = True 1197 self._is_objective_c = False 1198 else: 1199 self._is_objective_c = False 1200 self._is_c = False 1201 1202 def set_did_inside_namespace_indent_warning(self): 1203 self._did_inside_namespace_indent_warning = True 1204 1205 def did_inside_namespace_indent_warning(self): 1206 return self._did_inside_namespace_indent_warning 1207 1208 def is_objective_c(self): 1209 if self._is_objective_c is None: 1210 for line in self._clean_lines.elided: 1211 # Starting with @ or #import seem like the best indications 1212 # that we have an Objective C file. 1213 if line.startswith("@") or line.startswith("#import"): 1214 self._is_objective_c = True 1215 break 1216 else: 1217 self._is_objective_c = False 1218 return self._is_objective_c 1219 1220 def is_c(self): 1221 if self._is_c is None: 1222 for line in self._clean_lines.lines: 1223 # if extern "C" is found, then it is a good indication 1224 # that we have a C header file. 1225 if line.startswith('extern "C"'): 1226 self._is_c = True 1227 break 1228 else: 1229 self._is_c = False 1230 return self._is_c 1231 1232 def is_c_or_objective_c(self): 1233 """Return whether the file extension corresponds to C or Objective-C.""" 1234 return self.is_c() or self.is_objective_c() 1235 1236 1237 class _EnumState(object): 1238 """Maintains whether currently in an enum declaration, and checks whether 1239 enum declarations follow the style guide. 1240 """ 1241 1242 def __init__(self): 1243 self.in_enum_decl = False 1244 self.is_webidl_enum = False 1245 1246 def process_clean_line(self, line): 1247 # FIXME: The regular expressions for expr_all_uppercase and expr_enum_end only accept integers 1248 # and identifiers for the value of the enumerator, but do not accept any other constant 1249 # expressions. However, this is sufficient for now (11/27/2012). 1250 expr_all_uppercase = r'\s*[A-Z0-9_]+\s*(?:=\s*[a-zA-Z0-9]+\s*)?,?\s*$' 1251 expr_starts_lowercase = r'\s*[a-z]' 1252 expr_enum_end = r'}\s*(?:[a-zA-Z0-9]+\s*(?:=\s*[a-zA-Z0-9]+)?)?\s*;\s*' 1253 expr_enum_start = r'\s*enum(?:\s+[a-zA-Z0-9]+)?\s*\{?\s*' 1254 if self.in_enum_decl: 1255 if match(r'\s*' + expr_enum_end + r'$', line): 1256 self.in_enum_decl = False 1257 self.is_webidl_enum = False 1258 elif match(expr_all_uppercase, line): 1259 return self.is_webidl_enum 1260 elif match(expr_starts_lowercase, line): 1261 return False 1262 else: 1263 matched = match(expr_enum_start + r'$', line) 1264 if matched: 1265 self.in_enum_decl = True 1266 else: 1267 matched = match(expr_enum_start + r'(?P<members>.*)' + expr_enum_end + r'$', line) 1268 if matched: 1269 members = matched.group('members').split(',') 1270 found_invalid_member = False 1271 for member in members: 1272 if match(expr_all_uppercase, member): 1273 found_invalid_member = not self.is_webidl_enum 1274 if match(expr_starts_lowercase, member): 1275 found_invalid_member = True 1276 if found_invalid_member: 1277 self.is_webidl_enum = False 1278 return False 1279 return True 1280 return True 1281 1282 def check_for_non_standard_constructs(clean_lines, line_number, 1283 class_state, error): 1284 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2. 1285 1286 Complain about several constructs which gcc-2 accepts, but which are 1287 not standard C++. Warning about these in lint is one way to ease the 1288 transition to new compilers. 1289 - put storage class first (e.g. "static const" instead of "const static"). 1290 - "%lld" instead of %qd" in printf-type functions. 1291 - "%1$d" is non-standard in printf-type functions. 1292 - "\%" is an undefined character escape sequence. 1293 - text after #endif is not allowed. 1294 - invalid inner-style forward declaration. 1295 - >? and <? operators, and their >?= and <?= cousins. 1296 - classes with virtual methods need virtual destructors (compiler warning 1297 available, but not turned on yet.) 1298 1299 Additionally, check for constructor/destructor style violations as it 1300 is very convenient to do so while checking for gcc-2 compliance. 1301 1302 Args: 1303 clean_lines: A CleansedLines instance containing the file. 1304 line_number: The number of the line to check. 1305 class_state: A _ClassState instance which maintains information about 1306 the current stack of nested class declarations being parsed. 1307 error: A callable to which errors are reported, which takes parameters: 1308 line number, error level, and message 1309 """ 1310 1311 # Remove comments from the line, but leave in strings for now. 1312 line = clean_lines.lines[line_number] 1313 1314 if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line): 1315 error(line_number, 'runtime/printf_format', 3, 1316 '%q in format strings is deprecated. Use %ll instead.') 1317 1318 if search(r'printf\s*\(.*".*%\d+\$', line): 1319 error(line_number, 'runtime/printf_format', 2, 1320 '%N$ formats are unconventional. Try rewriting to avoid them.') 1321 1322 # Remove escaped backslashes before looking for undefined escapes. 1323 line = line.replace('\\\\', '') 1324 1325 if search(r'("|\').*\\(%|\[|\(|{)', line): 1326 error(line_number, 'build/printf_format', 3, 1327 '%, [, (, and { are undefined character escapes. Unescape them.') 1328 1329 # For the rest, work with both comments and strings removed. 1330 line = clean_lines.elided[line_number] 1331 1332 if search(r'\b(const|volatile|void|char|short|int|long' 1333 r'|float|double|signed|unsigned' 1334 r'|schar|u?int8|u?int16|u?int32|u?int64)' 1335 r'\s+(auto|register|static|extern|typedef)\b', 1336 line): 1337 error(line_number, 'build/storage_class', 5, 1338 'Storage class (static, extern, typedef, etc) should be first.') 1339 1340 if match(r'\s*#\s*endif\s*[^/\s]+', line): 1341 error(line_number, 'build/endif_comment', 5, 1342 'Uncommented text after #endif is non-standard. Use a comment.') 1343 1344 if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line): 1345 error(line_number, 'build/forward_decl', 5, 1346 'Inner-style forward declarations are invalid. Remove this line.') 1347 1348 if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line): 1349 error(line_number, 'build/deprecated', 3, 1350 '>? and <? (max and min) operators are non-standard and deprecated.') 1351 1352 # Track class entry and exit, and attempt to find cases within the 1353 # class declaration that don't meet the C++ style 1354 # guidelines. Tracking is very dependent on the code matching Google 1355 # style guidelines, but it seems to perform well enough in testing 1356 # to be a worthwhile addition to the checks. 1357 classinfo_stack = class_state.classinfo_stack 1358 # Look for a class declaration 1359 class_decl_match = match( 1360 r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line) 1361 if class_decl_match: 1362 classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number)) 1363 1364 # Everything else in this function uses the top of the stack if it's 1365 # not empty. 1366 if not classinfo_stack: 1367 return 1368 1369 classinfo = classinfo_stack[-1] 1370 1371 # If the opening brace hasn't been seen look for it and also 1372 # parent class declarations. 1373 if not classinfo.seen_open_brace: 1374 # If the line has a ';' in it, assume it's a forward declaration or 1375 # a single-line class declaration, which we won't process. 1376 if line.find(';') != -1: 1377 classinfo_stack.pop() 1378 return 1379 classinfo.seen_open_brace = (line.find('{') != -1) 1380 # Look for a bare ':' 1381 if search('(^|[^:]):($|[^:])', line): 1382 classinfo.is_derived = True 1383 if not classinfo.seen_open_brace: 1384 return # Everything else in this function is for after open brace 1385 1386 # The class may have been declared with namespace or classname qualifiers. 1387 # The constructor and destructor will not have those qualifiers. 1388 base_classname = classinfo.name.split('::')[-1] 1389 1390 # Look for single-argument constructors that aren't marked explicit. 1391 # Technically a valid construct, but against style. 1392 args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)' 1393 % re.escape(base_classname), 1394 line) 1395 if (args 1396 and args.group(1) != 'void' 1397 and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname), 1398 args.group(1).strip())): 1399 error(line_number, 'runtime/explicit', 5, 1400 'Single-argument constructors should be marked explicit.') 1401 1402 # Look for methods declared virtual. 1403 if search(r'\bvirtual\b', line): 1404 classinfo.virtual_method_line_number = line_number 1405 # Only look for a destructor declaration on the same line. It would 1406 # be extremely unlikely for the destructor declaration to occupy 1407 # more than one line. 1408 if search(r'~%s\s*\(' % base_classname, line): 1409 classinfo.has_virtual_destructor = True 1410 1411 # Look for class end. 1412 brace_depth = classinfo.brace_depth 1413 brace_depth = brace_depth + line.count('{') - line.count('}') 1414 if brace_depth <= 0: 1415 classinfo = classinfo_stack.pop() 1416 # Try to detect missing virtual destructor declarations. 1417 # For now, only warn if a non-derived class with virtual methods lacks 1418 # a virtual destructor. This is to make it less likely that people will 1419 # declare derived virtual destructors without declaring the base 1420 # destructor virtual. 1421 if ((classinfo.virtual_method_line_number is not None) 1422 and (not classinfo.has_virtual_destructor) 1423 and (not classinfo.is_derived)): # Only warn for base classes 1424 error(classinfo.line_number, 'runtime/virtual', 4, 1425 'The class %s probably needs a virtual destructor due to ' 1426 'having virtual method(s), one declared at line %d.' 1427 % (classinfo.name, classinfo.virtual_method_line_number)) 1428 # Look for mixed bool and unsigned bitfields. 1429 if (classinfo.bool_bitfields and classinfo.unsigned_bitfields): 1430 bool_list = ', '.join(classinfo.bool_bitfields) 1431 unsigned_list = ', '.join(classinfo.unsigned_bitfields) 1432 error(classinfo.line_number, 'runtime/bitfields', 5, 1433 'The class %s contains mixed unsigned and bool bitfields, ' 1434 'which will pack into separate words on the MSVC compiler.\n' 1435 'Bool bitfields are [%s].\nUnsigned bitfields are [%s].\n' 1436 'Consider converting bool bitfields to unsigned.' 1437 % (classinfo.name, bool_list, unsigned_list)) 1438 else: 1439 classinfo.brace_depth = brace_depth 1440 1441 well_typed_bitfield = False; 1442 # Look for bool <name> : 1 declarations. 1443 args = search(r'\bbool\s+(\S*)\s*:\s*\d+\s*;', line) 1444 if args: 1445 classinfo.bool_bitfields.append('%d: %s' % (line_number, args.group(1))) 1446 well_typed_bitfield = True; 1447 1448 # Look for unsigned <name> : n declarations. 1449 args = search(r'\bunsigned\s+(?:int\s+)?(\S+)\s*:\s*\d+\s*;', line) 1450 if args: 1451 classinfo.unsigned_bitfields.append('%d: %s' % (line_number, args.group(1))) 1452 well_typed_bitfield = True; 1453 1454 # Look for other bitfield declarations. We don't care about those in 1455 # size-matching structs. 1456 if not (well_typed_bitfield or classinfo.name.startswith('SameSizeAs') or 1457 classinfo.name.startswith('Expected')): 1458 args = match(r'\s*(\S+)\s+(\S+)\s*:\s*\d+\s*;', line) 1459 if args: 1460 error(line_number, 'runtime/bitfields', 4, 1461 'Member %s of class %s defined as a bitfield of type %s. ' 1462 'Please declare all bitfields as unsigned.' 1463 % (args.group(2), classinfo.name, args.group(1))) 1464 1465 def check_spacing_for_function_call(line, line_number, error): 1466 """Checks for the correctness of various spacing around function calls. 1467 1468 Args: 1469 line: The text of the line to check. 1470 line_number: The number of the line to check. 1471 error: The function to call with any errors found. 1472 """ 1473 1474 # Since function calls often occur inside if/for/foreach/while/switch 1475 # expressions - which have their own, more liberal conventions - we 1476 # first see if we should be looking inside such an expression for a 1477 # function call, to which we can apply more strict standards. 1478 function_call = line # if there's no control flow construct, look at whole line 1479 for pattern in (r'\bif\s*\((.*)\)\s*{', 1480 r'\bfor\s*\((.*)\)\s*{', 1481 r'\bforeach\s*\((.*)\)\s*{', 1482 r'\bwhile\s*\((.*)\)\s*[{;]', 1483 r'\bswitch\s*\((.*)\)\s*{'): 1484 matched = search(pattern, line) 1485 if matched: 1486 function_call = matched.group(1) # look inside the parens for function calls 1487 break 1488 1489 # Except in if/for/foreach/while/switch, there should never be space 1490 # immediately inside parens (eg "f( 3, 4 )"). We make an exception 1491 # for nested parens ( (a+b) + c ). Likewise, there should never be 1492 # a space before a ( when it's a function argument. I assume it's a 1493 # function argument when the char before the whitespace is legal in 1494 # a function name (alnum + _) and we're not starting a macro. Also ignore 1495 # pointers and references to arrays and functions coz they're too tricky: 1496 # we use a very simple way to recognize these: 1497 # " (something)(maybe-something)" or 1498 # " (something)(maybe-something," or 1499 # " (something)[something]" 1500 # Note that we assume the contents of [] to be short enough that 1501 # they'll never need to wrap. 1502 if ( # Ignore control structures. 1503 not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call) 1504 # Ignore pointers/references to functions. 1505 and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call) 1506 # Ignore pointers/references to arrays. 1507 and not search(r' \([^)]+\)\[[^\]]+\]', function_call)): 1508 if search(r'\w\s*\([ \t](?!\s*\\$)', function_call): # a ( used for a fn call 1509 error(line_number, 'whitespace/parens', 4, 1510 'Extra space after ( in function call') 1511 elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call): 1512 error(line_number, 'whitespace/parens', 2, 1513 'Extra space after (') 1514 if (search(r'\w\s+\(', function_call) 1515 and not match(r'\s*(#|typedef)', function_call)): 1516 error(line_number, 'whitespace/parens', 4, 1517 'Extra space before ( in function call') 1518 # If the ) is followed only by a newline or a { + newline, assume it's 1519 # part of a control statement (if/while/etc), and don't complain 1520 if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call): 1521 error(line_number, 'whitespace/parens', 2, 1522 'Extra space before )') 1523 1524 1525 def is_blank_line(line): 1526 """Returns true if the given line is blank. 1527 1528 We consider a line to be blank if the line is empty or consists of 1529 only white spaces. 1530 1531 Args: 1532 line: A line of a string. 1533 1534 Returns: 1535 True, if the given line is blank. 1536 """ 1537 return not line or line.isspace() 1538 1539 1540 def detect_functions(clean_lines, line_number, function_state, error): 1541 """Finds where functions start and end. 1542 1543 Uses a simplistic algorithm assuming other style guidelines 1544 (especially spacing) are followed. 1545 Trivial bodies are unchecked, so constructors with huge initializer lists 1546 may be missed. 1547 1548 Args: 1549 clean_lines: A CleansedLines instance containing the file. 1550 line_number: The number of the line to check. 1551 function_state: Current function name and lines in body so far. 1552 error: The function to call with any errors found. 1553 """ 1554 # Are we now past the end of a function? 1555 if function_state.end_position.row + 1 == line_number: 1556 function_state.end() 1557 1558 # If we're in a function, don't try to detect a new one. 1559 if function_state.in_a_function: 1560 return 1561 1562 lines = clean_lines.lines 1563 line = lines[line_number] 1564 raw = clean_lines.raw_lines 1565 raw_line = raw[line_number] 1566 1567 # Lines ending with a \ indicate a macro. Don't try to check them. 1568 if raw_line.endswith('\\'): 1569 return 1570 1571 regexp = r'\s*(\w(\w|::|\*|\&|\s|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(' # decls * & space::name( ... 1572 match_result = match(regexp, line) 1573 if not match_result: 1574 return 1575 1576 # If the name is all caps and underscores, figure it's a macro and 1577 # ignore it, unless it's TEST or TEST_F. 1578 function_name = match_result.group(1).split()[-1] 1579 if function_name != 'TEST' and function_name != 'TEST_F' and match(r'[A-Z_]+$', function_name): 1580 return 1581 1582 joined_line = '' 1583 for start_line_number in xrange(line_number, clean_lines.num_lines()): 1584 start_line = clean_lines.elided[start_line_number] 1585 joined_line += ' ' + start_line.lstrip() 1586 body_match = search(r'{|;', start_line) 1587 if body_match: 1588 body_start_position = Position(start_line_number, body_match.start(0)) 1589 1590 # Replace template constructs with _ so that no spaces remain in the function name, 1591 # while keeping the column numbers of other characters the same as "line". 1592 line_with_no_templates = iteratively_replace_matches_with_char(r'<[^<>]*>', '_', line) 1593 match_function = search(r'((\w|:|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(', line_with_no_templates) 1594 if not match_function: 1595 return # The '(' must have been inside of a template. 1596 1597 # Use the column numbers from the modified line to find the 1598 # function name in the original line. 1599 function = line[match_function.start(1):match_function.end(1)] 1600 function_name_start_position = Position(line_number, match_function.start(1)) 1601 1602 if match(r'TEST', function): # Handle TEST... macros 1603 parameter_regexp = search(r'(\(.*\))', joined_line) 1604 if parameter_regexp: # Ignore bad syntax 1605 function += parameter_regexp.group(1) 1606 else: 1607 function += '()' 1608 1609 parameter_start_position = Position(line_number, match_function.end(1)) 1610 parameter_end_position = close_expression(clean_lines.elided, parameter_start_position) 1611 if parameter_end_position.row == len(clean_lines.elided): 1612 # No end was found. 1613 return 1614 1615 if start_line[body_start_position.column] == ';': 1616 end_position = Position(body_start_position.row, body_start_position.column + 1) 1617 else: 1618 end_position = close_expression(clean_lines.elided, body_start_position) 1619 1620 # Check for nonsensical positions. (This happens in test cases which check code snippets.) 1621 if parameter_end_position > body_start_position: 1622 return 1623 1624 function_state.begin(function, function_name_start_position, body_start_position, end_position, 1625 parameter_start_position, parameter_end_position, clean_lines) 1626 return 1627 1628 # No body for the function (or evidence of a non-function) was found. 1629 error(line_number, 'readability/fn_size', 5, 1630 'Lint failed to find start of function body.') 1631 1632 1633 def check_for_function_lengths(clean_lines, line_number, function_state, error): 1634 """Reports for long function bodies. 1635 1636 For an overview why this is done, see: 1637 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions 1638 1639 Blank/comment lines are not counted so as to avoid encouraging the removal 1640 of vertical space and commments just to get through a lint check. 1641 NOLINT *on the last line of a function* disables this check. 1642 1643 Args: 1644 clean_lines: A CleansedLines instance containing the file. 1645 line_number: The number of the line to check. 1646 function_state: Current function name and lines in body so far. 1647 error: The function to call with any errors found. 1648 """ 1649 lines = clean_lines.lines 1650 line = lines[line_number] 1651 raw = clean_lines.raw_lines 1652 raw_line = raw[line_number] 1653 1654 if function_state.end_position.row == line_number: # last line 1655 if not search(r'\bNOLINT\b', raw_line): 1656 function_state.check(error, line_number) 1657 elif not match(r'^\s*$', line): 1658 function_state.count(line_number) # Count non-blank/non-comment lines. 1659 1660 1661 def _check_parameter_name_against_text(parameter, text, error): 1662 """Checks to see if the parameter name is contained within the text. 1663 1664 Return false if the check failed (i.e. an error was produced). 1665 """ 1666 1667 # Treat 'lower with underscores' as a canonical form because it is 1668 # case insensitive while still retaining word breaks. (This ensures that 1669 # 'elate' doesn't look like it is duplicating of 'NateLate'.) 1670 canonical_parameter_name = parameter.lower_with_underscores_name() 1671 1672 # Appends "object" to all text to catch variables that did the same (but only 1673 # do this when the parameter name is more than a single character to avoid 1674 # flagging 'b' which may be an ok variable when used in an rgba function). 1675 if len(canonical_parameter_name) > 1: 1676 text = sub(r'(\w)\b', r'\1Object', text) 1677 canonical_text = _convert_to_lower_with_underscores(text) 1678 1679 # Used to detect cases like ec for ExceptionCode. 1680 acronym = _create_acronym(text).lower() 1681 if canonical_text.find(canonical_parameter_name) != -1 or acronym.find(canonical_parameter_name) != -1: 1682 error(parameter.row, 'readability/parameter_name', 5, 1683 'The parameter name "%s" adds no information, so it should be removed.' % parameter.name) 1684 return False 1685 return True 1686 1687 1688 def check_function_definition_and_pass_ptr(type_text, row, location_description, error): 1689 """Check that function definitions for use Pass*Ptr instead of *Ptr. 1690 1691 Args: 1692 type_text: A string containing the type. (For return values, it may contain more than the type.) 1693 row: The row number of the type. 1694 location_description: Used to indicate where the type is. This is either 'parameter' or 'return'. 1695 error: The function to call with any errors found. 1696 """ 1697 match_ref_or_own_ptr = '(?=\W|^)(Ref|Own)Ptr(?=\W)' 1698 exceptions = '(?:&|\*|\*\s*=\s*0)$' 1699 bad_type_usage = search(match_ref_or_own_ptr, type_text) 1700 exception_usage = search(exceptions, type_text) 1701 if not bad_type_usage or exception_usage: 1702 return 1703 type_name = bad_type_usage.group(0) 1704 error(row, 'readability/pass_ptr', 5, 1705 'The %s type should use Pass%s instead of %s.' % (location_description, type_name, type_name)) 1706 1707 1708 def check_function_definition(filename, file_extension, clean_lines, line_number, function_state, error): 1709 """Check that function definitions for style issues. 1710 1711 Specifically, check that parameter names in declarations add information. 1712 1713 Args: 1714 filename: Filename of the file that is being processed. 1715 file_extension: The current file extension, without the leading dot. 1716 clean_lines: A CleansedLines instance containing the file. 1717 line_number: The number of the line to check. 1718 function_state: Current function name and lines in body so far. 1719 error: The function to call with any errors found. 1720 """ 1721 if line_number != function_state.body_start_position.row: 1722 return 1723 1724 modifiers_and_return_type = function_state.modifiers_and_return_type() 1725 if filename.find('/chromium/') != -1 and search(r'\bWEBKIT_EXPORT\b', modifiers_and_return_type): 1726 if filename.find('/chromium/public/') == -1 and filename.find('/chromium/tests/') == -1 and filename.find('chromium/platform') == -1: 1727 error(function_state.function_name_start_position.row, 'readability/webkit_export', 5, 1728 'WEBKIT_EXPORT should only appear in the chromium public (or tests) directory.') 1729 elif not file_extension == "h": 1730 error(function_state.function_name_start_position.row, 'readability/webkit_export', 5, 1731 'WEBKIT_EXPORT should only be used in header files.') 1732 elif not function_state.is_declaration or search(r'\binline\b', modifiers_and_return_type): 1733 error(function_state.function_name_start_position.row, 'readability/webkit_export', 5, 1734 'WEBKIT_EXPORT should not be used on a function with a body.') 1735 elif function_state.is_pure: 1736 error(function_state.function_name_start_position.row, 'readability/webkit_export', 5, 1737 'WEBKIT_EXPORT should not be used with a pure virtual function.') 1738 1739 check_function_definition_and_pass_ptr(modifiers_and_return_type, function_state.function_name_start_position.row, 'return', error) 1740 1741 parameter_list = function_state.parameter_list() 1742 for parameter in parameter_list: 1743 check_function_definition_and_pass_ptr(parameter.type, parameter.row, 'parameter', error) 1744 1745 # Do checks specific to function declarations and parameter names. 1746 if not function_state.is_declaration or not parameter.name: 1747 continue 1748 1749 # Check the parameter name against the function name for single parameter set functions. 1750 if len(parameter_list) == 1 and match('set[A-Z]', function_state.current_function): 1751 trimmed_function_name = function_state.current_function[len('set'):] 1752 if not _check_parameter_name_against_text(parameter, trimmed_function_name, error): 1753 continue # Since an error was noted for this name, move to the next parameter. 1754 1755 # Check the parameter name against the type. 1756 if not _check_parameter_name_against_text(parameter, parameter.type, error): 1757 continue # Since an error was noted for this name, move to the next parameter. 1758 1759 1760 def check_pass_ptr_usage(clean_lines, line_number, function_state, error): 1761 """Check for proper usage of Pass*Ptr. 1762 1763 Currently this is limited to detecting declarations of Pass*Ptr 1764 variables inside of functions. 1765 1766 Args: 1767 clean_lines: A CleansedLines instance containing the file. 1768 line_number: The number of the line to check. 1769 function_state: Current function name and lines in body so far. 1770 error: The function to call with any errors found. 1771 """ 1772 if not function_state.in_a_function: 1773 return 1774 1775 lines = clean_lines.lines 1776 line = lines[line_number] 1777 if line_number > function_state.body_start_position.row: 1778 matched_pass_ptr = match(r'^\s*Pass([A-Z][A-Za-z]*)Ptr<', line) 1779 if matched_pass_ptr: 1780 type_name = 'Pass%sPtr' % matched_pass_ptr.group(1) 1781 error(line_number, 'readability/pass_ptr', 5, 1782 'Local variables should never be %s (see ' 1783 'http://webkit.org/coding/RefPtr.html).' % type_name) 1784 1785 1786 def check_for_leaky_patterns(clean_lines, line_number, function_state, error): 1787 """Check for constructs known to be leak prone. 1788 Args: 1789 clean_lines: A CleansedLines instance containing the file. 1790 line_number: The number of the line to check. 1791 function_state: Current function name and lines in body so far. 1792 error: The function to call with any errors found. 1793 """ 1794 lines = clean_lines.lines 1795 line = lines[line_number] 1796 1797 matched_get_dc = search(r'\b(?P<function_name>GetDC(Ex)?)\s*\(', line) 1798 if matched_get_dc: 1799 error(line_number, 'runtime/leaky_pattern', 5, 1800 'Use the class HWndDC instead of calling %s to avoid potential ' 1801 'memory leaks.' % matched_get_dc.group('function_name')) 1802 1803 matched_create_dc = search(r'\b(?P<function_name>Create(Compatible)?DC)\s*\(', line) 1804 matched_own_dc = search(r'\badoptPtr\b', line) 1805 if matched_create_dc and not matched_own_dc: 1806 error(line_number, 'runtime/leaky_pattern', 5, 1807 'Use adoptPtr and OwnPtr<HDC> when calling %s to avoid potential ' 1808 'memory leaks.' % matched_create_dc.group('function_name')) 1809 1810 1811 def check_spacing(file_extension, clean_lines, line_number, error): 1812 """Checks for the correctness of various spacing issues in the code. 1813 1814 Things we check for: spaces around operators, spaces after 1815 if/for/while/switch, no spaces around parens in function calls, two 1816 spaces between code and comment, don't start a block with a blank 1817 line, don't end a function with a blank line, don't have too many 1818 blank lines in a row. 1819 1820 Args: 1821 file_extension: The current file extension, without the leading dot. 1822 clean_lines: A CleansedLines instance containing the file. 1823 line_number: The number of the line to check. 1824 error: The function to call with any errors found. 1825 """ 1826 1827 raw = clean_lines.raw_lines 1828 line = raw[line_number] 1829 1830 # Before nixing comments, check if the line is blank for no good 1831 # reason. This includes the first line after a block is opened, and 1832 # blank lines at the end of a function (ie, right before a line like '}'). 1833 if is_blank_line(line): 1834 elided = clean_lines.elided 1835 previous_line = elided[line_number - 1] 1836 previous_brace = previous_line.rfind('{') 1837 # FIXME: Don't complain if line before blank line, and line after, 1838 # both start with alnums and are indented the same amount. 1839 # This ignores whitespace at the start of a namespace block 1840 # because those are not usually indented. 1841 if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1 1842 and previous_line[:previous_brace].find('namespace') == -1): 1843 # OK, we have a blank line at the start of a code block. Before we 1844 # complain, we check if it is an exception to the rule: The previous 1845 # non-empty line has the parameters of a function header that are indented 1846 # 4 spaces (because they did not fit in a 80 column line when placed on 1847 # the same line as the function name). We also check for the case where 1848 # the previous line is indented 6 spaces, which may happen when the 1849 # initializers of a constructor do not fit into a 80 column line. 1850 exception = False 1851 if match(r' {6}\w', previous_line): # Initializer list? 1852 # We are looking for the opening column of initializer list, which 1853 # should be indented 4 spaces to cause 6 space indentation afterwards. 1854 search_position = line_number - 2 1855 while (search_position >= 0 1856 and match(r' {6}\w', elided[search_position])): 1857 search_position -= 1 1858 exception = (search_position >= 0 1859 and elided[search_position][:5] == ' :') 1860 else: 1861 # Search for the function arguments or an initializer list. We use a 1862 # simple heuristic here: If the line is indented 4 spaces; and we have a 1863 # closing paren, without the opening paren, followed by an opening brace 1864 # or colon (for initializer lists) we assume that it is the last line of 1865 # a function header. If we have a colon indented 4 spaces, it is an 1866 # initializer list. 1867 exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', 1868 previous_line) 1869 or match(r' {4}:', previous_line)) 1870 1871 if not exception: 1872 error(line_number, 'whitespace/blank_line', 2, 1873 'Blank line at the start of a code block. Is this needed?') 1874 # This doesn't ignore whitespace at the end of a namespace block 1875 # because that is too hard without pairing open/close braces; 1876 # however, a special exception is made for namespace closing 1877 # brackets which have a comment containing "namespace". 1878 # 1879 # Also, ignore blank lines at the end of a block in a long if-else 1880 # chain, like this: 1881 # if (condition1) { 1882 # // Something followed by a blank line 1883 # 1884 # } else if (condition2) { 1885 # // Something else 1886 # } 1887 if line_number + 1 < clean_lines.num_lines(): 1888 next_line = raw[line_number + 1] 1889 if (next_line 1890 and match(r'\s*}', next_line) 1891 and next_line.find('namespace') == -1 1892 and next_line.find('} else ') == -1): 1893 error(line_number, 'whitespace/blank_line', 3, 1894 'Blank line at the end of a code block. Is this needed?') 1895 1896 # Next, we check for proper spacing with respect to comments. 1897 comment_position = line.find('//') 1898 if comment_position != -1: 1899 # Check if the // may be in quotes. If so, ignore it 1900 # Comparisons made explicit for clarity 1901 if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0: # not in quotes 1902 # Allow one space before end of line comment. 1903 if (not match(r'^\s*$', line[:comment_position]) 1904 and (comment_position >= 1 1905 and ((line[comment_position - 1] not in string.whitespace) 1906 or (comment_position >= 2 1907 and line[comment_position - 2] in string.whitespace)))): 1908 error(line_number, 'whitespace/comments', 5, 1909 'One space before end of line comments') 1910 # There should always be a space between the // and the comment 1911 commentend = comment_position + 2 1912 if commentend < len(line) and not line[commentend] == ' ': 1913 # but some lines are exceptions -- e.g. if they're big 1914 # comment delimiters like: 1915 # //---------------------------------------------------------- 1916 # or they begin with multiple slashes followed by a space: 1917 # //////// Header comment 1918 matched = (search(r'[=/-]{4,}\s*$', line[commentend:]) 1919 or search(r'^/+ ', line[commentend:])) 1920 if not matched: 1921 error(line_number, 'whitespace/comments', 4, 1922 'Should have a space between // and comment') 1923 1924 # There should only be one space after punctuation in a comment. 1925 if search(r'[.!?,;:]\s\s+\w', line[comment_position:]): 1926 error(line_number, 'whitespace/comments', 5, 1927 'Should have only a single space after a punctuation in a comment.') 1928 1929 line = clean_lines.elided[line_number] # get rid of comments and strings 1930 1931 # Don't try to do spacing checks for operator methods 1932 line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>|\+=|-=|\*=|/=|%=|&=|\|=|^=|<<=|>>=|/)\(', 'operator\(', line) 1933 # Don't try to do spacing checks for #include or #import statements at 1934 # minimum because it messes up checks for spacing around / 1935 if match(r'\s*#\s*(?:include|import)', line): 1936 return 1937 if search(r'[\w.]=[\w.]', line): 1938 error(line_number, 'whitespace/operators', 4, 1939 'Missing spaces around =') 1940 1941 # FIXME: It's not ok to have spaces around binary operators like . 1942 1943 # You should always have whitespace around binary operators. 1944 # Alas, we can't test < or > because they're legitimately used sans spaces 1945 # (a->b, vector<int> a). The only time we can tell is a < with no >, and 1946 # only if it's not template params list spilling into the next line. 1947 matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line) 1948 if not matched: 1949 # Note that while it seems that the '<[^<]*' term in the following 1950 # regexp could be simplified to '<.*', which would indeed match 1951 # the same class of strings, the [^<] means that searching for the 1952 # regexp takes linear rather than quadratic time. 1953 if not search(r'<[^<]*,\s*$', line): # template params spill 1954 matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line) 1955 if matched: 1956 error(line_number, 'whitespace/operators', 3, 1957 'Missing spaces around %s' % matched.group(1)) 1958 1959 # There shouldn't be space around unary operators 1960 matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) 1961 if matched: 1962 error(line_number, 'whitespace/operators', 4, 1963 'Extra space for operator %s' % matched.group(1)) 1964 1965 # A pet peeve of mine: no spaces after an if, while, switch, or for 1966 matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line) 1967 if matched: 1968 error(line_number, 'whitespace/parens', 5, 1969 'Missing space before ( in %s' % matched.group(1)) 1970 1971 # For if/for/foreach/while/switch, the left and right parens should be 1972 # consistent about how many spaces are inside the parens, and 1973 # there should either be zero or one spaces inside the parens. 1974 # We don't want: "if ( foo)" or "if ( foo )". 1975 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. 1976 matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<remainder>.*)$', line) 1977 if matched: 1978 statement = matched.group('statement') 1979 condition, rest = up_to_unmatched_closing_paren(matched.group('remainder')) 1980 if condition is not None: 1981 condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition) 1982 if condition_match: 1983 n_leading = len(condition_match.group('leading')) 1984 n_trailing = len(condition_match.group('trailing')) 1985 if n_leading != 0: 1986 for_exception = statement == 'for' and condition.startswith(' ;') 1987 if not for_exception: 1988 error(line_number, 'whitespace/parens', 5, 1989 'Extra space after ( in %s' % statement) 1990 if n_trailing != 0: 1991 for_exception = statement == 'for' and condition.endswith('; ') 1992 if not for_exception: 1993 error(line_number, 'whitespace/parens', 5, 1994 'Extra space before ) in %s' % statement) 1995 1996 # Do not check for more than one command in macros 1997 in_preprocessor_directive = match(r'\s*#', line) 1998 if not in_preprocessor_directive and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest): 1999 error(line_number, 'whitespace/parens', 4, 2000 'More than one command on the same line in %s' % statement) 2001 2002 # You should always have a space after a comma (either as fn arg or operator) 2003 if search(r',[^\s]', line): 2004 error(line_number, 'whitespace/comma', 3, 2005 'Missing space after ,') 2006 2007 matched = search(r'^\s*(?P<token1>[a-zA-Z0-9_\*&]+)\s\s+(?P<token2>[a-zA-Z0-9_\*&]+)', line) 2008 if matched: 2009 error(line_number, 'whitespace/declaration', 3, 2010 'Extra space between %s and %s' % (matched.group('token1'), matched.group('token2'))) 2011 2012 if file_extension == 'cpp': 2013 # C++ should have the & or * beside the type not the variable name. 2014 matched = match(r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+', line) 2015 if matched: 2016 error(line_number, 'whitespace/declaration', 3, 2017 'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip())) 2018 2019 elif file_extension == 'c': 2020 # C Pointer declaration should have the * beside the variable not the type name. 2021 matched = search(r'^\s*\w+\*\s+\w+', line) 2022 if matched: 2023 error(line_number, 'whitespace/declaration', 3, 2024 'Declaration has space between * and variable name in %s' % matched.group(0).strip()) 2025 2026 # Next we will look for issues with function calls. 2027 check_spacing_for_function_call(line, line_number, error) 2028 2029 # Except after an opening paren, you should have spaces before your braces. 2030 # And since you should never have braces at the beginning of a line, this is 2031 # an easy test. 2032 if search(r'[^ ({]{', line): 2033 error(line_number, 'whitespace/braces', 5, 2034 'Missing space before {') 2035 2036 # Make sure '} else {' has spaces. 2037 if search(r'}else', line): 2038 error(line_number, 'whitespace/braces', 5, 2039 'Missing space before else') 2040 2041 # You shouldn't have spaces before your brackets, except maybe after 2042 # 'delete []' or 'new char * []'. 2043 if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line): 2044 error(line_number, 'whitespace/braces', 5, 2045 'Extra space before [') 2046 2047 # There should always be a single space in between braces on the same line. 2048 if search(r'\{\}', line): 2049 error(line_number, 'whitespace/braces', 5, 'Missing space inside { }.') 2050 if search(r'\{\s\s+\}', line): 2051 error(line_number, 'whitespace/braces', 5, 'Too many spaces inside { }.') 2052 2053 # You shouldn't have a space before a semicolon at the end of the line. 2054 # There's a special case for "for" since the style guide allows space before 2055 # the semicolon there. 2056 if search(r':\s*;\s*$', line): 2057 error(line_number, 'whitespace/semicolon', 5, 2058 'Semicolon defining empty statement. Use { } instead.') 2059 elif search(r'^\s*;\s*$', line): 2060 error(line_number, 'whitespace/semicolon', 5, 2061 'Line contains only semicolon. If this should be an empty statement, ' 2062 'use { } instead.') 2063 elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)): 2064 error(line_number, 'whitespace/semicolon', 5, 2065 'Extra space before last semicolon. If this should be an empty ' 2066 'statement, use { } instead.') 2067 elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line) 2068 and line.count('(') == line.count(')') 2069 # Allow do {} while(); 2070 and not search(r'}\s*while', line)): 2071 error(line_number, 'whitespace/semicolon', 5, 2072 'Semicolon defining empty statement for this loop. Use { } instead.') 2073 2074 2075 def get_previous_non_blank_line(clean_lines, line_number): 2076 """Return the most recent non-blank line and its line number. 2077 2078 Args: 2079 clean_lines: A CleansedLines instance containing the file contents. 2080 line_number: The number of the line to check. 2081 2082 Returns: 2083 A tuple with two elements. The first element is the contents of the last 2084 non-blank line before the current line, or the empty string if this is the 2085 first non-blank line. The second is the line number of that line, or -1 2086 if this is the first non-blank line. 2087 """ 2088 2089 previous_line_number = line_number - 1 2090 while previous_line_number >= 0: 2091 previous_line = clean_lines.elided[previous_line_number] 2092 if not is_blank_line(previous_line): # if not a blank line... 2093 return (previous_line, previous_line_number) 2094 previous_line_number -= 1 2095 return ('', -1) 2096 2097 2098 def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error): 2099 """Looks for indentation errors inside of namespaces. 2100 2101 Args: 2102 clean_lines: A CleansedLines instance containing the file. 2103 line_number: The number of the line to check. 2104 file_extension: The extension (dot not included) of the file. 2105 file_state: A _FileState instance which maintains information about 2106 the state of things in the file. 2107 error: The function to call with any errors found. 2108 """ 2109 2110 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2111 2112 namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line) 2113 if not namespace_match: 2114 return 2115 2116 current_indentation_level = len(namespace_match.group('namespace_indentation')) 2117 if current_indentation_level > 0: 2118 # Don't warn about an indented namespace if we already warned about indented code. 2119 if not file_state.did_inside_namespace_indent_warning(): 2120 error(line_number, 'whitespace/indent', 4, 2121 'namespace should never be indented.') 2122 return 2123 looking_for_semicolon = False; 2124 line_offset = 0 2125 in_preprocessor_directive = False; 2126 for current_line in clean_lines.elided[line_number + 1:]: 2127 line_offset += 1 2128 if not current_line.strip(): 2129 continue 2130 if not current_indentation_level: 2131 if not (in_preprocessor_directive or looking_for_semicolon): 2132 if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning(): 2133 file_state.set_did_inside_namespace_indent_warning() 2134 error(line_number + line_offset, 'whitespace/indent', 4, 2135 'Code inside a namespace should not be indented.') 2136 if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax. 2137 in_preprocessor_directive = current_line[-1] == '\\' 2138 else: 2139 looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\') 2140 else: 2141 looking_for_semicolon = False; # If we have a brace we may not need a semicolon. 2142 current_indentation_level += current_line.count('{') - current_line.count('}') 2143 if current_indentation_level < 0: 2144 break; 2145 2146 2147 def check_enum_casing(clean_lines, line_number, enum_state, error): 2148 """Looks for incorrectly named enum values. 2149 2150 Args: 2151 clean_lines: A CleansedLines instance containing the file. 2152 line_number: The number of the line to check. 2153 enum_state: A _EnumState instance which maintains enum declaration state. 2154 error: The function to call with any errors found. 2155 """ 2156 2157 enum_state.is_webidl_enum |= bool(match(r'\s*// Web(?:Kit)?IDL enum\s*$', clean_lines.raw_lines[line_number])) 2158 2159 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2160 if not enum_state.process_clean_line(line): 2161 error(line_number, 'readability/enum_casing', 4, 2162 'enum members should use InterCaps with an initial capital letter.') 2163 2164 def check_directive_indentation(clean_lines, line_number, file_state, error): 2165 """Looks for indentation of preprocessor directives. 2166 2167 Args: 2168 clean_lines: A CleansedLines instance containing the file. 2169 line_number: The number of the line to check. 2170 file_state: A _FileState instance which maintains information about 2171 the state of things in the file. 2172 error: The function to call with any errors found. 2173 """ 2174 2175 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2176 2177 indented_preprocessor_directives = match(r'\s+#', line) 2178 if not indented_preprocessor_directives: 2179 return 2180 2181 error(line_number, 'whitespace/indent', 4, 'preprocessor directives (e.g., #ifdef, #define, #import) should never be indented.') 2182 2183 2184 def get_initial_spaces_for_line(clean_line): 2185 initial_spaces = 0 2186 while initial_spaces < len(clean_line) and clean_line[initial_spaces] == ' ': 2187 initial_spaces += 1 2188 return initial_spaces 2189 2190 2191 def check_indentation_amount(clean_lines, line_number, error): 2192 line = clean_lines.elided[line_number] 2193 initial_spaces = get_initial_spaces_for_line(line) 2194 2195 if initial_spaces % 4: 2196 error(line_number, 'whitespace/indent', 3, 2197 'Weird number of spaces at line-start. Are you using a 4-space indent?') 2198 return 2199 2200 previous_line = get_previous_non_blank_line(clean_lines, line_number)[0] 2201 if not previous_line.strip() or match(r'\s*\w+\s*:\s*$', previous_line) or previous_line[0] == '#': 2202 return 2203 2204 previous_line_initial_spaces = get_initial_spaces_for_line(previous_line) 2205 if initial_spaces > previous_line_initial_spaces + 4: 2206 error(line_number, 'whitespace/indent', 3, 'When wrapping a line, only indent 4 spaces.') 2207 2208 2209 def check_using_std(clean_lines, line_number, file_state, error): 2210 """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'. 2211 2212 Args: 2213 clean_lines: A CleansedLines instance containing the file. 2214 line_number: The number of the line to check. 2215 file_state: A _FileState instance which maintains information about 2216 the state of things in the file. 2217 error: The function to call with any errors found. 2218 """ 2219 2220 # This check doesn't apply to C or Objective-C implementation files. 2221 if file_state.is_c_or_objective_c(): 2222 return 2223 2224 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2225 2226 using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line) 2227 if not using_std_match: 2228 return 2229 2230 method_name = using_std_match.group('method_name') 2231 error(line_number, 'build/using_std', 4, 2232 "Use 'using namespace std;' instead of 'using std::%s;'." % method_name) 2233 2234 2235 def check_max_min_macros(clean_lines, line_number, file_state, error): 2236 """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min(). 2237 2238 Args: 2239 clean_lines: A CleansedLines instance containing the file. 2240 line_number: The number of the line to check. 2241 file_state: A _FileState instance which maintains information about 2242 the state of things in the file. 2243 error: The function to call with any errors found. 2244 """ 2245 2246 # This check doesn't apply to C or Objective-C implementation files. 2247 if file_state.is_c_or_objective_c(): 2248 return 2249 2250 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2251 2252 max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line) 2253 if not max_min_macros_search: 2254 return 2255 2256 max_min_macro = max_min_macros_search.group('max_min_macro') 2257 max_min_macro_lower = max_min_macro.lower() 2258 error(line_number, 'runtime/max_min_macros', 4, 2259 'Use std::%s() or std::%s<type>() instead of the %s() macro.' 2260 % (max_min_macro_lower, max_min_macro_lower, max_min_macro)) 2261 2262 2263 def check_ctype_functions(clean_lines, line_number, file_state, error): 2264 """Looks for use of the standard functions in ctype.h and suggest they be replaced 2265 by use of equivilent ones in <wtf/ASCIICType.h>?. 2266 2267 Args: 2268 clean_lines: A CleansedLines instance containing the file. 2269 line_number: The number of the line to check. 2270 file_state: A _FileState instance which maintains information about 2271 the state of things in the file. 2272 error: The function to call with any errors found. 2273 """ 2274 2275 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2276 2277 ctype_function_search = search(r'\b(?P<ctype_function>(isalnum|isalpha|isascii|isblank|iscntrl|isdigit|isgraph|islower|isprint|ispunct|isspace|isupper|isxdigit|toascii|tolower|toupper))\s*\(', line) 2278 if not ctype_function_search: 2279 return 2280 2281 ctype_function = ctype_function_search.group('ctype_function') 2282 error(line_number, 'runtime/ctype_function', 4, 2283 'Use equivelent function in <wtf/ASCIICType.h> instead of the %s() function.' 2284 % (ctype_function)) 2285 2286 def check_switch_indentation(clean_lines, line_number, error): 2287 """Looks for indentation errors inside of switch statements. 2288 2289 Args: 2290 clean_lines: A CleansedLines instance containing the file. 2291 line_number: The number of the line to check. 2292 error: The function to call with any errors found. 2293 """ 2294 2295 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2296 2297 switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line) 2298 if not switch_match: 2299 return 2300 2301 switch_indentation = switch_match.group('switch_indentation') 2302 inner_indentation = switch_indentation + ' ' * 4 2303 line_offset = 0 2304 encountered_nested_switch = False 2305 2306 for current_line in clean_lines.elided[line_number + 1:]: 2307 line_offset += 1 2308 2309 # Skip not only empty lines but also those with preprocessor directives. 2310 if current_line.strip() == '' or current_line.startswith('#'): 2311 continue 2312 2313 if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line): 2314 # Complexity alarm - another switch statement nested inside the one 2315 # that we're currently testing. We'll need to track the extent of 2316 # that inner switch if the upcoming label tests are still supposed 2317 # to work correctly. Let's not do that; instead, we'll finish 2318 # checking this line, and then leave it like that. Assuming the 2319 # indentation is done consistently (even if incorrectly), this will 2320 # still catch all indentation issues in practice. 2321 encountered_nested_switch = True 2322 2323 current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line); 2324 current_indentation = current_indentation_match.group('indentation') 2325 remaining_line = current_indentation_match.group('remaining_line') 2326 2327 # End the check at the end of the switch statement. 2328 if remaining_line.startswith('}') and current_indentation == switch_indentation: 2329 break 2330 # Case and default branches should not be indented. The regexp also 2331 # catches single-line cases like "default: break;" but does not trigger 2332 # on stuff like "Document::Foo();". 2333 elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line): 2334 if current_indentation != switch_indentation: 2335 error(line_number + line_offset, 'whitespace/indent', 4, 2336 'A case label should not be indented, but line up with its switch statement.') 2337 # Don't throw an error for multiple badly indented labels, 2338 # one should be enough to figure out the problem. 2339 break 2340 # We ignore goto labels at the very beginning of a line. 2341 elif match(r'\w+\s*:\s*$', remaining_line): 2342 continue 2343 # It's not a goto label, so check if it's indented at least as far as 2344 # the switch statement plus one more level of indentation. 2345 elif not current_indentation.startswith(inner_indentation): 2346 error(line_number + line_offset, 'whitespace/indent', 4, 2347 'Non-label code inside switch statements should be indented.') 2348 # Don't throw an error for multiple badly indented statements, 2349 # one should be enough to figure out the problem. 2350 break 2351 2352 if encountered_nested_switch: 2353 break 2354 2355 2356 def check_braces(clean_lines, line_number, error): 2357 """Looks for misplaced braces (e.g. at the end of line). 2358 2359 Args: 2360 clean_lines: A CleansedLines instance containing the file. 2361 line_number: The number of the line to check. 2362 error: The function to call with any errors found. 2363 """ 2364 2365 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2366 2367 if match(r'\s*{\s*$', line): 2368 # We allow an open brace to start a line in the case where someone 2369 # is using braces for function definition or in a block to 2370 # explicitly create a new scope, which is commonly used to control 2371 # the lifetime of stack-allocated variables. We don't detect this 2372 # perfectly: we just don't complain if the last non-whitespace 2373 # character on the previous non-blank line is ';', ':', '{', '}', 2374 # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'. 2375 # We also allow '#' for #endif and '=' for array initialization. 2376 previous_line = get_previous_non_blank_line(clean_lines, line_number)[0] 2377 if ((not search(r'[;:}{)=]\s*$|\)\s*((const|OVERRIDE)\s*)*\s*$', previous_line) 2378 or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line)) 2379 and previous_line.find('#') < 0): 2380 error(line_number, 'whitespace/braces', 4, 2381 'This { should be at the end of the previous line') 2382 elif (search(r'\)\s*(((const|OVERRIDE)\s*)*\s*)?{\s*$', line) 2383 and line.count('(') == line.count(')') 2384 and not search(r'\b(if|for|foreach|while|switch)\b', line) 2385 and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)): 2386 error(line_number, 'whitespace/braces', 4, 2387 'Place brace on its own line for function definitions.') 2388 2389 # An else clause should be on the same line as the preceding closing brace. 2390 if match(r'\s*else\s*', line): 2391 previous_line = get_previous_non_blank_line(clean_lines, line_number)[0] 2392 if match(r'\s*}\s*$', previous_line): 2393 error(line_number, 'whitespace/newline', 4, 2394 'An else should appear on the same line as the preceding }') 2395 2396 # Likewise, an else should never have the else clause on the same line 2397 if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line): 2398 error(line_number, 'whitespace/newline', 4, 2399 'Else clause should never be on same line as else (use 2 lines)') 2400 2401 # In the same way, a do/while should never be on one line 2402 if match(r'\s*do [^\s{]', line): 2403 error(line_number, 'whitespace/newline', 4, 2404 'do/while clauses should not be on a single line') 2405 2406 # Braces shouldn't be followed by a ; unless they're defining a struct 2407 # or initializing an array. 2408 # We can't tell in general, but we can for some common cases. 2409 previous_line_number = line_number 2410 while True: 2411 (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number) 2412 if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'): 2413 line = previous_line + line 2414 else: 2415 break 2416 if (search(r'{.*}\s*;', line) 2417 and line.count('{') == line.count('}') 2418 and not search(r'struct|class|enum|\s*=\s*{', line)): 2419 error(line_number, 'readability/braces', 4, 2420 "You don't need a ; after a }") 2421 2422 2423 def check_exit_statement_simplifications(clean_lines, line_number, error): 2424 """Looks for else or else-if statements that should be written as an 2425 if statement when the prior if concludes with a return, break, continue or 2426 goto statement. 2427 2428 Args: 2429 clean_lines: A CleansedLines instance containing the file. 2430 line_number: The number of the line to check. 2431 error: The function to call with any errors found. 2432 """ 2433 2434 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2435 2436 else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line) 2437 if not else_match: 2438 return 2439 2440 else_indentation = else_match.group('else_indentation') 2441 inner_indentation = else_indentation + ' ' * 4 2442 2443 previous_lines = clean_lines.elided[:line_number] 2444 previous_lines.reverse() 2445 line_offset = 0 2446 encountered_exit_statement = False 2447 2448 for current_line in previous_lines: 2449 line_offset -= 1 2450 2451 # Skip not only empty lines but also those with preprocessor directives 2452 # and goto labels. 2453 if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line): 2454 continue 2455 2456 # Skip lines with closing braces on the original indentation level. 2457 # Even though the styleguide says they should be on the same line as 2458 # the "else if" statement, we also want to check for instances where 2459 # the current code does not comply with the coding style. Thus, ignore 2460 # these lines and proceed to the line before that. 2461 if current_line == else_indentation + '}': 2462 continue 2463 2464 current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line); 2465 current_indentation = current_indentation_match.group('indentation') 2466 remaining_line = current_indentation_match.group('remaining_line') 2467 2468 # As we're going up the lines, the first real statement to encounter 2469 # has to be an exit statement (return, break, continue or goto) - 2470 # otherwise, this check doesn't apply. 2471 if not encountered_exit_statement: 2472 # We only want to find exit statements if they are on exactly 2473 # the same level of indentation as expected from the code inside 2474 # the block. If the indentation doesn't strictly match then we 2475 # might have a nested if or something, which must be ignored. 2476 if current_indentation != inner_indentation: 2477 break 2478 if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line): 2479 encountered_exit_statement = True 2480 continue 2481 break 2482 2483 # When code execution reaches this point, we've found an exit statement 2484 # as last statement of the previous block. Now we only need to make 2485 # sure that the block belongs to an "if", then we can throw an error. 2486 2487 # Skip lines with opening braces on the original indentation level, 2488 # similar to the closing braces check above. ("if (condition)\n{") 2489 if current_line == else_indentation + '{': 2490 continue 2491 2492 # Skip everything that's further indented than our "else" or "else if". 2493 if current_indentation.startswith(else_indentation) and current_indentation != else_indentation: 2494 continue 2495 2496 # So we've got a line with same (or less) indentation. Is it an "if"? 2497 # If yes: throw an error. If no: don't throw an error. 2498 # Whatever the outcome, this is the end of our loop. 2499 if match(r'if\s*\(', remaining_line): 2500 if else_match.start('else') != -1: 2501 error(line_number + line_offset, 'readability/control_flow', 4, 2502 'An else statement can be removed when the prior "if" ' 2503 'concludes with a return, break, continue or goto statement.') 2504 else: 2505 error(line_number + line_offset, 'readability/control_flow', 4, 2506 'An else if statement should be written as an if statement ' 2507 'when the prior "if" concludes with a return, break, ' 2508 'continue or goto statement.') 2509 break 2510 2511 2512 def replaceable_check(operator, macro, line): 2513 """Determine whether a basic CHECK can be replaced with a more specific one. 2514 2515 For example suggest using CHECK_EQ instead of CHECK(a == b) and 2516 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE. 2517 2518 Args: 2519 operator: The C++ operator used in the CHECK. 2520 macro: The CHECK or EXPECT macro being called. 2521 line: The current source line. 2522 2523 Returns: 2524 True if the CHECK can be replaced with a more specific one. 2525 """ 2526 2527 # This matches decimal and hex integers, strings, and chars (in that order). 2528 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')' 2529 2530 # Expression to match two sides of the operator with something that 2531 # looks like a literal, since CHECK(x == iterator) won't compile. 2532 # This means we can't catch all the cases where a more specific 2533 # CHECK is possible, but it's less annoying than dealing with 2534 # extraneous warnings. 2535 match_this = (r'\s*' + macro + r'\((\s*' + 2536 match_constant + r'\s*' + operator + r'[^<>].*|' 2537 r'.*[^<>]' + operator + r'\s*' + match_constant + 2538 r'\s*\))') 2539 2540 # Don't complain about CHECK(x == NULL) or similar because 2541 # CHECK_EQ(x, NULL) won't compile (requires a cast). 2542 # Also, don't complain about more complex boolean expressions 2543 # involving && or || such as CHECK(a == b || c == d). 2544 return match(match_this, line) and not search(r'NULL|&&|\|\|', line) 2545 2546 2547 def check_check(clean_lines, line_number, error): 2548 """Checks the use of CHECK and EXPECT macros. 2549 2550 Args: 2551 clean_lines: A CleansedLines instance containing the file. 2552 line_number: The number of the line to check. 2553 error: The function to call with any errors found. 2554 """ 2555 2556 # Decide the set of replacement macros that should be suggested 2557 raw_lines = clean_lines.raw_lines 2558 current_macro = '' 2559 for macro in _CHECK_MACROS: 2560 if raw_lines[line_number].find(macro) >= 0: 2561 current_macro = macro 2562 break 2563 if not current_macro: 2564 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT' 2565 return 2566 2567 line = clean_lines.elided[line_number] # get rid of comments and strings 2568 2569 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc. 2570 for operator in ['==', '!=', '>=', '>', '<=', '<']: 2571 if replaceable_check(operator, current_macro, line): 2572 error(line_number, 'readability/check', 2, 2573 'Consider using %s instead of %s(a %s b)' % ( 2574 _CHECK_REPLACEMENT[current_macro][operator], 2575 current_macro, operator)) 2576 break 2577 2578 2579 def check_for_comparisons_to_zero(clean_lines, line_number, error): 2580 # Get the line without comments and strings. 2581 line = clean_lines.elided[line_number] 2582 2583 # Include NULL here so that users don't have to convert NULL to 0 first and then get this error. 2584 if search(r'[=!]=\s*(NULL|0|true|false)[^\w.]', line) or search(r'[^\w.](NULL|0|true|false)\s*[=!]=', line): 2585 if not search('LIKELY', line) and not search('UNLIKELY', line): 2586 error(line_number, 'readability/comparison_to_zero', 5, 2587 'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.') 2588 2589 2590 def check_for_null(clean_lines, line_number, file_state, error): 2591 # This check doesn't apply to C or Objective-C implementation files. 2592 if file_state.is_c_or_objective_c(): 2593 return 2594 2595 line = clean_lines.elided[line_number] 2596 2597 # Don't warn about NULL usage in g_*(). See Bug 32858 and 39372. 2598 if search(r'\bg(_[a-z]+)+\b', line): 2599 return 2600 2601 # Don't warn about NULL usage in gst_*(). See Bug 70498. 2602 if search(r'\bgst(_[a-z]+)+\b', line): 2603 return 2604 2605 # Don't warn about NULL usage in gdk_pixbuf_save_to_*{join,concat}(). See Bug 43090. 2606 if search(r'\bgdk_pixbuf_save_to\w+\b', line): 2607 return 2608 2609 # Don't warn about NULL usage in gtk_widget_style_get(), gtk_style_context_get_style(), or gtk_style_context_get(). See Bug 51758 2610 if search(r'\bgtk_widget_style_get\(\w+\b', line) or search(r'\bgtk_style_context_get_style\(\w+\b', line) or search(r'\bgtk_style_context_get\(\w+\b', line): 2611 return 2612 2613 # Don't warn about NULL usage in soup_server_new(). See Bug 77890. 2614 if search(r'\bsoup_server_new\(\w+\b', line): 2615 return 2616 2617 if search(r'\bNULL\b', line): 2618 error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.') 2619 return 2620 2621 line = clean_lines.raw_lines[line_number] 2622 # See if NULL occurs in any comments in the line. If the search for NULL using the raw line 2623 # matches, then do the check with strings collapsed to avoid giving errors for 2624 # NULLs occurring in strings. 2625 if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)): 2626 error(line_number, 'readability/null', 4, 'Use 0 or null instead of NULL (even in *comments*).') 2627 2628 def get_line_width(line): 2629 """Determines the width of the line in column positions. 2630 2631 Args: 2632 line: A string, which may be a Unicode string. 2633 2634 Returns: 2635 The width of the line in column positions, accounting for Unicode 2636 combining characters and wide characters. 2637 """ 2638 if isinstance(line, unicode): 2639 width = 0 2640 for c in unicodedata.normalize('NFC', line): 2641 if unicodedata.east_asian_width(c) in ('W', 'F'): 2642 width += 2 2643 elif not unicodedata.combining(c): 2644 width += 1 2645 return width 2646 return len(line) 2647 2648 2649 def check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line_number, error): 2650 """Scans the bodies of conditionals and loops, and in particular 2651 all the arms of conditionals, for violations in the use of braces. 2652 2653 Specifically: 2654 2655 (1) If an arm omits braces, then the following statement must be on one 2656 physical line. 2657 (2) If any arm uses braces, all arms must use them. 2658 2659 These checks are only done here if we find the start of an 2660 'if/for/foreach/while' statement, because this function fails fast 2661 if it encounters constructs it doesn't understand. Checks 2662 elsewhere validate other constraints, such as requiring '}' and 2663 'else' to be on the same line. 2664 2665 Args: 2666 clean_lines: A CleansedLines instance containing the file. 2667 line_number: The number of the line to check. 2668 error: The function to call with any errors found. 2669 """ 2670 2671 # We work with the elided lines. Comments have been removed, but line 2672 # numbers are preserved, so we can still find situations where 2673 # single-expression control clauses span multiple lines, or when a 2674 # comment preceded the expression. 2675 lines = clean_lines.elided 2676 line = lines[line_number] 2677 2678 # Match control structures. 2679 control_match = match(r'\s*(if|foreach|for|while)\s*\(', line) 2680 if not control_match: 2681 return 2682 2683 # Found the start of a conditional or loop. 2684 2685 # The following loop handles all potential arms of the control clause. 2686 # The initial conditions are the following: 2687 # - We start on the opening paren '(' of the condition, *unless* we are 2688 # handling an 'else' block, in which case there is no condition. 2689 # - In the latter case, we start at the position just beyond the 'else' 2690 # token. 2691 expect_conditional_expression = True 2692 know_whether_using_braces = False 2693 using_braces = False 2694 search_for_else_clause = control_match.group(1) == "if" 2695 current_pos = Position(line_number, control_match.end() - 1) 2696 2697 while True: 2698 if expect_conditional_expression: 2699 # Try to find the end of the conditional expression, 2700 # potentially spanning multiple lines. 2701 open_paren_pos = current_pos 2702 close_paren_pos = close_expression(lines, open_paren_pos) 2703 if close_paren_pos.column < 0: 2704 return 2705 current_pos = close_paren_pos 2706 2707 end_line_of_conditional = current_pos.row 2708 2709 # Find the start of the body. 2710 current_pos = _find_in_lines(r'\S', lines, current_pos, None) 2711 if not current_pos: 2712 return 2713 2714 current_arm_uses_brace = False 2715 if lines[current_pos.row][current_pos.column] == '{': 2716 current_arm_uses_brace = True 2717 if know_whether_using_braces: 2718 if using_braces != current_arm_uses_brace: 2719 error(current_pos.row, 'whitespace/braces', 4, 2720 'If one part of an if-else statement uses curly braces, the other part must too.') 2721 return 2722 know_whether_using_braces = True 2723 using_braces = current_arm_uses_brace 2724 2725 if using_braces: 2726 # Skip over the entire arm. 2727 current_pos = close_expression(lines, current_pos) 2728 if current_pos.column < 0: 2729 return 2730 else: 2731 # Skip over the current expression. 2732 current_line_number = current_pos.row 2733 current_pos = _find_in_lines(r';', lines, current_pos, None) 2734 if not current_pos: 2735 return 2736 # If the end of the expression is beyond the line just after 2737 # the close parenthesis or control clause, we've found a 2738 # single-expression arm that spans multiple lines. (We don't 2739 # fire this error for expressions ending on the same line; that 2740 # is a different error, handled elsewhere.) 2741 if current_pos.row > 1 + end_line_of_conditional: 2742 error(current_pos.row, 'whitespace/braces', 4, 2743 'A conditional or loop body must use braces if the statement is more than one line long.') 2744 return 2745 current_pos = Position(current_pos.row, 1 + current_pos.column) 2746 2747 # At this point current_pos points just past the end of the last 2748 # arm. If we just handled the last control clause, we're done. 2749 if not search_for_else_clause: 2750 return 2751 2752 # Scan forward for the next non-whitespace character, and see 2753 # whether we are continuing a conditional (with an 'else' or 2754 # 'else if'), or are done. 2755 current_pos = _find_in_lines(r'\S', lines, current_pos, None) 2756 if not current_pos: 2757 return 2758 next_nonspace_string = lines[current_pos.row][current_pos.column:] 2759 next_conditional = match(r'(else\s*if|else)', next_nonspace_string) 2760 if not next_conditional: 2761 # Done processing this 'if' and all arms. 2762 return 2763 if next_conditional.group(1) == "else if": 2764 current_pos = _find_in_lines(r'\(', lines, current_pos, None) 2765 else: 2766 current_pos.column += 4 # skip 'else' 2767 expect_conditional_expression = False 2768 search_for_else_clause = False 2769 # End while loop 2770 2771 def check_style(clean_lines, line_number, file_extension, class_state, file_state, enum_state, error): 2772 """Checks rules from the 'C++ style rules' section of cppguide.html. 2773 2774 Most of these rules are hard to test (naming, comment style), but we 2775 do what we can. In particular we check for 4-space indents, line lengths, 2776 tab usage, spaces inside code, etc. 2777 2778 Args: 2779 clean_lines: A CleansedLines instance containing the file. 2780 line_number: The number of the line to check. 2781 file_extension: The extension (without the dot) of the filename. 2782 class_state: A _ClassState instance which maintains information about 2783 the current stack of nested class declarations being parsed. 2784 file_state: A _FileState instance which maintains information about 2785 the state of things in the file. 2786 enum_state: A _EnumState instance which maintains the current enum state. 2787 error: The function to call with any errors found. 2788 """ 2789 2790 raw_lines = clean_lines.raw_lines 2791 line = raw_lines[line_number] 2792 2793 if line.find('\t') != -1: 2794 error(line_number, 'whitespace/tab', 1, 2795 'Tab found; better to use spaces') 2796 2797 cleansed_line = clean_lines.elided[line_number] 2798 if line and line[-1].isspace(): 2799 error(line_number, 'whitespace/end_of_line', 4, 2800 'Line ends in whitespace. Consider deleting these extra spaces.') 2801 2802 if (cleansed_line.count(';') > 1 2803 # for loops are allowed two ;'s (and may run over two lines). 2804 and cleansed_line.find('for') == -1 2805 and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1 2806 or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1) 2807 # It's ok to have many commands in a switch case that fits in 1 line 2808 and not ((cleansed_line.find('case ') != -1 2809 or cleansed_line.find('default:') != -1) 2810 and cleansed_line.find('break;') != -1) 2811 # Also it's ok to have many commands in trivial single-line accessors in class definitions. 2812 and not (match(r'.*\(.*\).*{.*.}', line) 2813 and class_state.classinfo_stack 2814 and line.count('{') == line.count('}')) 2815 and not cleansed_line.startswith('#define ') 2816 # It's ok to use use WTF_MAKE_NONCOPYABLE and WTF_MAKE_FAST_ALLOCATED macros in 1 line 2817 and not (cleansed_line.find("WTF_MAKE_NONCOPYABLE") != -1 2818 and cleansed_line.find("WTF_MAKE_FAST_ALLOCATED") != -1)): 2819 error(line_number, 'whitespace/newline', 4, 2820 'More than one command on the same line') 2821 2822 if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'): 2823 error(line_number, 'whitespace/operators', 4, 2824 'Boolean expressions that span multiple lines should have their ' 2825 'operators on the left side of the line instead of the right side.') 2826 2827 # Some more style checks 2828 check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error) 2829 check_directive_indentation(clean_lines, line_number, file_state, error) 2830 check_using_std(clean_lines, line_number, file_state, error) 2831 check_max_min_macros(clean_lines, line_number, file_state, error) 2832 check_ctype_functions(clean_lines, line_number, file_state, error) 2833 check_switch_indentation(clean_lines, line_number, error) 2834 check_braces(clean_lines, line_number, error) 2835 check_exit_statement_simplifications(clean_lines, line_number, error) 2836 check_spacing(file_extension, clean_lines, line_number, error) 2837 check_check(clean_lines, line_number, error) 2838 check_for_comparisons_to_zero(clean_lines, line_number, error) 2839 check_for_null(clean_lines, line_number, file_state, error) 2840 check_indentation_amount(clean_lines, line_number, error) 2841 check_enum_casing(clean_lines, line_number, enum_state, error) 2842 2843 2844 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"') 2845 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') 2846 # Matches the first component of a filename delimited by -s and _s. That is: 2847 # _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' 2848 # _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo' 2849 # _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo' 2850 # _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo' 2851 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') 2852 2853 2854 def _drop_common_suffixes(filename): 2855 """Drops common suffixes like _test.cpp or -inl.h from filename. 2856 2857 For example: 2858 >>> _drop_common_suffixes('foo/foo-inl.h') 2859 'foo/foo' 2860 >>> _drop_common_suffixes('foo/bar/foo.cpp') 2861 'foo/bar/foo' 2862 >>> _drop_common_suffixes('foo/foo_internal.h') 2863 'foo/foo' 2864 >>> _drop_common_suffixes('foo/foo_unusualinternal.h') 2865 'foo/foo_unusualinternal' 2866 2867 Args: 2868 filename: The input filename. 2869 2870 Returns: 2871 The filename with the common suffix removed. 2872 """ 2873 for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp', 2874 'inl.h', 'impl.h', 'internal.h'): 2875 if (filename.endswith(suffix) and len(filename) > len(suffix) 2876 and filename[-len(suffix) - 1] in ('-', '_')): 2877 return filename[:-len(suffix) - 1] 2878 return os.path.splitext(filename)[0] 2879 2880 2881 def _classify_include(filename, include, is_system, include_state): 2882 """Figures out what kind of header 'include' is. 2883 2884 Args: 2885 filename: The current file cpp_style is running over. 2886 include: The path to a #included file. 2887 is_system: True if the #include used <> rather than "". 2888 include_state: An _IncludeState instance in which the headers are inserted. 2889 2890 Returns: 2891 One of the _XXX_HEADER constants. 2892 2893 For example: 2894 >>> _classify_include('foo.cpp', 'config.h', False) 2895 _CONFIG_HEADER 2896 >>> _classify_include('foo.cpp', 'foo.h', False) 2897 _PRIMARY_HEADER 2898 >>> _classify_include('foo.cpp', 'bar.h', False) 2899 _OTHER_HEADER 2900 """ 2901 2902 # If it is a system header we know it is classified as _OTHER_HEADER. 2903 if is_system and not include.startswith('public/'): 2904 return _OTHER_HEADER 2905 2906 # If the include is named config.h then this is WebCore/config.h. 2907 if include == "config.h": 2908 return _CONFIG_HEADER 2909 2910 # There cannot be primary includes in header files themselves. Only an 2911 # include exactly matches the header filename will be is flagged as 2912 # primary, so that it triggers the "don't include yourself" check. 2913 if filename.endswith('.h') and filename != include: 2914 return _OTHER_HEADER; 2915 2916 # Qt's moc files do not follow the naming and ordering rules, so they should be skipped 2917 if include.startswith('moc_') and include.endswith('.cpp'): 2918 return _MOC_HEADER 2919 2920 if include.endswith('.moc'): 2921 return _MOC_HEADER 2922 2923 # If the target file basename starts with the include we're checking 2924 # then we consider it the primary header. 2925 target_base = FileInfo(filename).base_name() 2926 include_base = FileInfo(include).base_name() 2927 2928 # If we haven't encountered a primary header, then be lenient in checking. 2929 if not include_state.visited_primary_section(): 2930 if target_base.find(include_base) != -1: 2931 return _PRIMARY_HEADER 2932 # Qt private APIs use _p.h suffix. 2933 if include_base.find(target_base) != -1 and include_base.endswith('_p'): 2934 return _PRIMARY_HEADER 2935 2936 # If we already encountered a primary header, perform a strict comparison. 2937 # In case the two filename bases are the same then the above lenient check 2938 # probably was a false positive. 2939 elif include_state.visited_primary_section() and target_base == include_base: 2940 if include == "ResourceHandleWin.h": 2941 # FIXME: Thus far, we've only seen one example of these, but if we 2942 # start to see more, please consider generalizing this check 2943 # somehow. 2944 return _OTHER_HEADER 2945 return _PRIMARY_HEADER 2946 2947 return _OTHER_HEADER 2948 2949 2950 def _does_primary_header_exist(filename): 2951 """Return a primary header file name for a file, or empty string 2952 if the file is not source file or primary header does not exist. 2953 """ 2954 fileinfo = FileInfo(filename) 2955 if not fileinfo.is_source(): 2956 return False 2957 primary_header = fileinfo.no_extension() + ".h" 2958 return os.path.isfile(primary_header) 2959 2960 2961 def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error): 2962 """Check rules that are applicable to #include lines. 2963 2964 Strings on #include lines are NOT removed from elided line, to make 2965 certain tasks easier. However, to prevent false positives, checks 2966 applicable to #include lines in CheckLanguage must be put here. 2967 2968 Args: 2969 filename: The name of the current file. 2970 file_extension: The current file extension, without the leading dot. 2971 clean_lines: A CleansedLines instance containing the file. 2972 line_number: The number of the line to check. 2973 include_state: An _IncludeState instance in which the headers are inserted. 2974 error: The function to call with any errors found. 2975 """ 2976 # FIXME: For readability or as a possible optimization, consider 2977 # exiting early here by checking whether the "build/include" 2978 # category should be checked for the given filename. This 2979 # may involve having the error handler classes expose a 2980 # should_check() method, in addition to the usual __call__ 2981 # method. 2982 line = clean_lines.lines[line_number] 2983 2984 matched = _RE_PATTERN_INCLUDE.search(line) 2985 if not matched: 2986 return 2987 2988 include = matched.group(2) 2989 is_system = (matched.group(1) == '<') 2990 2991 # Look for any of the stream classes that are part of standard C++. 2992 if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include): 2993 error(line_number, 'readability/streams', 3, 2994 'Streams are highly discouraged.') 2995 2996 # Look for specific includes to fix. 2997 if include.startswith('wtf/') and is_system: 2998 error(line_number, 'build/include', 4, 2999 'wtf includes should be "wtf/file.h" instead of <wtf/file.h>.') 3000 3001 if filename.find('/chromium/') != -1 and include.startswith('cc/CC'): 3002 error(line_number, 'build/include', 4, 3003 'cc includes should be "CCFoo.h" instead of "cc/CCFoo.h".') 3004 3005 duplicate_header = include in include_state 3006 if duplicate_header: 3007 error(line_number, 'build/include', 4, 3008 '"%s" already included at %s:%s' % 3009 (include, filename, include_state[include])) 3010 else: 3011 include_state[include] = line_number 3012 3013 header_type = _classify_include(filename, include, is_system, include_state) 3014 primary_header_exists = _does_primary_header_exist(filename) 3015 include_state.header_types[line_number] = header_type 3016 3017 # Only proceed if this isn't a duplicate header. 3018 if duplicate_header: 3019 return 3020 3021 # We want to ensure that headers appear in the right order: 3022 # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted 3023 # 2) for header files: alphabetically sorted 3024 # The include_state object keeps track of the last type seen 3025 # and complains if the header types are out of order or missing. 3026 error_message = include_state.check_next_include_order(header_type, 3027 file_extension == "h", 3028 primary_header_exists) 3029 3030 # Check to make sure we have a blank line after primary header. 3031 if not error_message and header_type == _PRIMARY_HEADER: 3032 next_line = clean_lines.raw_lines[line_number + 1] 3033 if not is_blank_line(next_line): 3034 error(line_number, 'build/include_order', 4, 3035 'You should add a blank line after implementation file\'s own header.') 3036 3037 # Check to make sure all headers besides config.h and the primary header are 3038 # alphabetically sorted. Skip Qt's moc files. 3039 if not error_message and header_type == _OTHER_HEADER: 3040 previous_line_number = line_number - 1; 3041 previous_line = clean_lines.lines[previous_line_number] 3042 previous_match = _RE_PATTERN_INCLUDE.search(previous_line) 3043 while (not previous_match and previous_line_number > 0 3044 and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)): 3045 previous_line_number -= 1; 3046 previous_line = clean_lines.lines[previous_line_number] 3047 previous_match = _RE_PATTERN_INCLUDE.search(previous_line) 3048 if previous_match: 3049 previous_header_type = include_state.header_types[previous_line_number] 3050 if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip(): 3051 # This type of error is potentially a problem with this line or the previous one, 3052 # so if the error is filtered for one line, report it for the next. This is so that 3053 # we properly handle patches, for which only modified lines produce errors. 3054 if not error(line_number - 1, 'build/include_order', 4, 'Alphabetical sorting problem.'): 3055 error(line_number, 'build/include_order', 4, 'Alphabetical sorting problem.') 3056 3057 if error_message: 3058 if file_extension == 'h': 3059 error(line_number, 'build/include_order', 4, 3060 '%s Should be: alphabetically sorted.' % 3061 error_message) 3062 else: 3063 error(line_number, 'build/include_order', 4, 3064 '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' % 3065 error_message) 3066 3067 3068 def check_language(filename, clean_lines, line_number, file_extension, include_state, 3069 file_state, error): 3070 """Checks rules from the 'C++ language rules' section of cppguide.html. 3071 3072 Some of these rules are hard to test (function overloading, using 3073 uint32 inappropriately), but we do the best we can. 3074 3075 Args: 3076 filename: The name of the current file. 3077 clean_lines: A CleansedLines instance containing the file. 3078 line_number: The number of the line to check. 3079 file_extension: The extension (without the dot) of the filename. 3080 include_state: An _IncludeState instance in which the headers are inserted. 3081 file_state: A _FileState instance which maintains information about 3082 the state of things in the file. 3083 error: The function to call with any errors found. 3084 """ 3085 # If the line is empty or consists of entirely a comment, no need to 3086 # check it. 3087 line = clean_lines.elided[line_number] 3088 if not line: 3089 return 3090 3091 matched = _RE_PATTERN_INCLUDE.search(line) 3092 if matched: 3093 check_include_line(filename, file_extension, clean_lines, line_number, include_state, error) 3094 return 3095 3096 # FIXME: figure out if they're using default arguments in fn proto. 3097 3098 # Check to see if they're using an conversion function cast. 3099 # I just try to capture the most common basic types, though there are more. 3100 # Parameterless conversion functions, such as bool(), are allowed as they are 3101 # probably a member operator declaration or default constructor. 3102 matched = search( 3103 r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line) 3104 if matched: 3105 # gMock methods are defined using some variant of MOCK_METHODx(name, type) 3106 # where type may be float(), int(string), etc. Without context they are 3107 # virtually indistinguishable from int(x) casts. 3108 if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line): 3109 error(line_number, 'readability/casting', 4, 3110 'Using deprecated casting style. ' 3111 'Use static_cast<%s>(...) instead' % 3112 matched.group(1)) 3113 3114 check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number], 3115 'static_cast', 3116 r'\((int|float|double|bool|char|u?int(16|32|64))\)', 3117 error) 3118 # This doesn't catch all cases. Consider (const char * const)"hello". 3119 check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number], 3120 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error) 3121 3122 # In addition, we look for people taking the address of a cast. This 3123 # is dangerous -- casts can assign to temporaries, so the pointer doesn't 3124 # point where you think. 3125 if search( 3126 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line): 3127 error(line_number, 'runtime/casting', 4, 3128 ('Are you taking an address of a cast? ' 3129 'This is dangerous: could be a temp var. ' 3130 'Take the address before doing the cast, rather than after')) 3131 3132 # Check for people declaring static/global STL strings at the top level. 3133 # This is dangerous because the C++ language does not guarantee that 3134 # globals with constructors are initialized before the first access. 3135 matched = match( 3136 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)', 3137 line) 3138 # Make sure it's not a function. 3139 # Function template specialization looks like: "string foo<Type>(...". 3140 # Class template definitions look like: "string Foo<Type>::Method(...". 3141 if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', 3142 matched.group(3)): 3143 error(line_number, 'runtime/string', 4, 3144 'For a static/global string constant, use a C style string instead: ' 3145 '"%schar %s[]".' % 3146 (matched.group(1), matched.group(2))) 3147 3148 # Check that we're not using RTTI outside of testing code. 3149 if search(r'\bdynamic_cast<', line): 3150 error(line_number, 'runtime/rtti', 5, 3151 'Do not use dynamic_cast<>. If you need to cast within a class ' 3152 "hierarchy, use static_cast<> to upcast. Google doesn't support " 3153 'RTTI.') 3154 3155 if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line): 3156 error(line_number, 'runtime/init', 4, 3157 'You seem to be initializing a member variable with itself.') 3158 3159 if file_extension == 'h': 3160 # FIXME: check that 1-arg constructors are explicit. 3161 # How to tell it's a constructor? 3162 # (handled in check_for_non_standard_constructs for now) 3163 pass 3164 3165 # Check if people are using the verboten C basic types. The only exception 3166 # we regularly allow is "unsigned short port" for port. 3167 if search(r'\bshort port\b', line): 3168 if not search(r'\bunsigned short port\b', line): 3169 error(line_number, 'runtime/int', 4, 3170 'Use "unsigned short" for ports, not "short"') 3171 3172 # When snprintf is used, the second argument shouldn't be a literal. 3173 matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) 3174 if matched: 3175 error(line_number, 'runtime/printf', 3, 3176 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' 3177 'to snprintf.' % (matched.group(1), matched.group(2))) 3178 3179 # Check if some verboten C functions are being used. 3180 if search(r'\bsprintf\b', line): 3181 error(line_number, 'runtime/printf', 5, 3182 'Never use sprintf. Use snprintf instead.') 3183 matched = search(r'\b(strcpy|strcat)\b', line) 3184 if matched: 3185 error(line_number, 'runtime/printf', 4, 3186 'Almost always, snprintf is better than %s' % matched.group(1)) 3187 3188 if search(r'\bsscanf\b', line): 3189 error(line_number, 'runtime/printf', 1, 3190 'sscanf can be ok, but is slow and can overflow buffers.') 3191 3192 # Check for suspicious usage of "if" like 3193 # } if (a == b) { 3194 if search(r'\}\s*if\s*\(', line): 3195 error(line_number, 'readability/braces', 4, 3196 'Did you mean "else if"? If not, start a new line for "if".') 3197 3198 # Check for potential format string bugs like printf(foo). 3199 # We constrain the pattern not to pick things like DocidForPrintf(foo). 3200 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) 3201 matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I) 3202 if matched: 3203 error(line_number, 'runtime/printf', 4, 3204 'Potential format string bug. Do %s("%%s", %s) instead.' 3205 % (matched.group(1), matched.group(2))) 3206 3207 # Check for potential memset bugs like memset(buf, sizeof(buf), 0). 3208 matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) 3209 if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)): 3210 error(line_number, 'runtime/memset', 4, 3211 'Did you mean "memset(%s, 0, %s)"?' 3212 % (matched.group(1), matched.group(2))) 3213 3214 # Detect variable-length arrays. 3215 matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) 3216 if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and 3217 matched.group(3).find(']') == -1): 3218 # Split the size using space and arithmetic operators as delimiters. 3219 # If any of the resulting tokens are not compile time constants then 3220 # report the error. 3221 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3)) 3222 is_const = True 3223 skip_next = False 3224 for tok in tokens: 3225 if skip_next: 3226 skip_next = False 3227 continue 3228 3229 if search(r'sizeof\(.+\)', tok): 3230 continue 3231 if search(r'arraysize\(\w+\)', tok): 3232 continue 3233 3234 tok = tok.lstrip('(') 3235 tok = tok.rstrip(')') 3236 if not tok: 3237 continue 3238 if match(r'\d+', tok): 3239 continue 3240 if match(r'0[xX][0-9a-fA-F]+', tok): 3241 continue 3242 if match(r'k[A-Z0-9]\w*', tok): 3243 continue 3244 if match(r'(.+::)?k[A-Z0-9]\w*', tok): 3245 continue 3246 if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): 3247 continue 3248 # A catch all for tricky sizeof cases, including 'sizeof expression', 3249 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' 3250 # requires skipping the next token becasue we split on ' ' and '*'. 3251 if tok.startswith('sizeof'): 3252 skip_next = True 3253 continue 3254 is_const = False 3255 break 3256 if not is_const: 3257 error(line_number, 'runtime/arrays', 1, 3258 'Do not use variable-length arrays. Use an appropriately named ' 3259 "('k' followed by CamelCase) compile-time constant for the size.") 3260 3261 # Check for use of unnamed namespaces in header files. Registration 3262 # macros are typically OK, so we allow use of "namespace {" on lines 3263 # that end with backslashes. 3264 if (file_extension == 'h' 3265 and search(r'\bnamespace\s*{', line) 3266 and line[-1] != '\\'): 3267 error(line_number, 'build/namespaces', 4, 3268 'Do not use unnamed namespaces in header files. See ' 3269 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' 3270 ' for more information.') 3271 3272 # Check for plain bitfields declared without either "singed" or "unsigned". 3273 # Most compilers treat such bitfields as signed, but there are still compilers like 3274 # RVCT 4.0 that use unsigned by default. 3275 matched = re.match(r'\s*((const|mutable)\s+)?(char|(short(\s+int)?)|int|long(\s+(long|int))?)\s+[a-zA-Z_][a-zA-Z0-9_]*\s*:\s*\d+\s*;', line) 3276 if matched: 3277 error(line_number, 'runtime/bitfields', 5, 3278 'Please declare integral type bitfields with either signed or unsigned.') 3279 3280 check_identifier_name_in_declaration(filename, line_number, line, file_state, error) 3281 3282 # Check for unsigned int (should be just 'unsigned') 3283 if search(r'\bunsigned int\b', line): 3284 error(line_number, 'runtime/unsigned', 1, 3285 'Omit int when using unsigned') 3286 3287 # Check that we're not using static_cast<Text*>. 3288 if search(r'\bstatic_cast<Text\*>', line): 3289 error(line_number, 'readability/check', 4, 3290 'Consider using toText helper function in WebCore/dom/Text.h ' 3291 'instead of static_cast<Text*>') 3292 3293 def check_identifier_name_in_declaration(filename, line_number, line, file_state, error): 3294 """Checks if identifier names contain any underscores. 3295 3296 As identifiers in libraries we are using have a bunch of 3297 underscores, we only warn about the declarations of identifiers 3298 and don't check use of identifiers. 3299 3300 Args: 3301 filename: The name of the current file. 3302 line_number: The number of the line to check. 3303 line: The line of code to check. 3304 file_state: A _FileState instance which maintains information about 3305 the state of things in the file. 3306 error: The function to call with any errors found. 3307 """ 3308 # We don't check a return statement. 3309 if match(r'\s*(return|delete)\b', line): 3310 return 3311 3312 # Basically, a declaration is a type name followed by whitespaces 3313 # followed by an identifier. The type name can be complicated 3314 # due to type adjectives and templates. We remove them first to 3315 # simplify the process to find declarations of identifiers. 3316 3317 # Convert "long long", "long double", and "long long int" to 3318 # simple types, but don't remove simple "long". 3319 line = sub(r'long (long )?(?=long|double|int)', '', line) 3320 # Convert unsigned/signed types to simple types, too. 3321 line = sub(r'(unsigned|signed) (?=char|short|int|long)', '', line) 3322 line = sub(r'\b(inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line) 3323 3324 # Remove "new" and "new (expr)" to simplify, too. 3325 line = sub(r'new\s*(\([^)]*\))?', '', line) 3326 3327 # Remove all template parameters by removing matching < and >. 3328 # Loop until no templates are removed to remove nested templates. 3329 while True: 3330 line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line) 3331 if not number_of_replacements: 3332 break 3333 3334 # Declarations of local variables can be in condition expressions 3335 # of control flow statements (e.g., "if (RenderObject* p = o->parent())"). 3336 # We remove the keywords and the first parenthesis. 3337 # 3338 # Declarations in "while", "if", and "switch" are different from 3339 # other declarations in two aspects: 3340 # 3341 # - There can be only one declaration between the parentheses. 3342 # (i.e., you cannot write "if (int i = 0, j = 1) {}") 3343 # - The variable must be initialized. 3344 # (i.e., you cannot write "if (int i) {}") 3345 # 3346 # and we will need different treatments for them. 3347 line = sub(r'^\s*for\s*\(', '', line) 3348 line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line) 3349 3350 # Detect variable and functions. 3351 type_regexp = r'\w([\w]|\s*[*&]\s*|::)+' 3352 identifier_regexp = r'(?P<identifier>[\w:]+)' 3353 maybe_bitfield_regexp = r'(:\s*\d+\s*)?' 3354 character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)' 3355 declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*' + maybe_bitfield_regexp + character_after_identifier_regexp 3356 declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp 3357 is_function_arguments = False 3358 number_of_identifiers = 0 3359 while True: 3360 # If we are seeing the first identifier or arguments of a 3361 # function, there should be a type name before an identifier. 3362 if not number_of_identifiers or is_function_arguments: 3363 declaration_regexp = declaration_with_type_regexp 3364 else: 3365 declaration_regexp = declaration_without_type_regexp 3366 3367 matched = match(declaration_regexp, line) 3368 if not matched: 3369 return 3370 identifier = matched.group('identifier') 3371 character_after_identifier = matched.group('character_after_identifier') 3372 3373 # If we removed a non-for-control statement, the character after 3374 # the identifier should be '='. With this rule, we can avoid 3375 # warning for cases like "if (val & INT_MAX) {". 3376 if control_statement and character_after_identifier != '=': 3377 return 3378 3379 is_function_arguments = is_function_arguments or character_after_identifier == '(' 3380 3381 # Remove "m_" and "s_" to allow them. 3382 modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier) 3383 if not file_state.is_objective_c() and modified_identifier.find('_') >= 0: 3384 # Various exceptions to the rule: JavaScript op codes functions, const_iterator. 3385 if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('op_') >= 0) 3386 and not (filename.find('gtk') >= 0 and modified_identifier.startswith('webkit_') >= 0) 3387 and not modified_identifier.startswith('tst_') 3388 and not modified_identifier.startswith('webkit_dom_object_') 3389 and not modified_identifier.startswith('webkit_soup') 3390 and not modified_identifier.startswith('NPN_') 3391 and not modified_identifier.startswith('NPP_') 3392 and not modified_identifier.startswith('NP_') 3393 and not modified_identifier.startswith('qt_') 3394 and not modified_identifier.startswith('_q_') 3395 and not modified_identifier.startswith('cairo_') 3396 and not modified_identifier.startswith('Ecore_') 3397 and not modified_identifier.startswith('Eina_') 3398 and not modified_identifier.startswith('Evas_') 3399 and not modified_identifier.startswith('Ewk_') 3400 and not modified_identifier.startswith('cti_') 3401 and not modified_identifier.find('::qt_') >= 0 3402 and not modified_identifier.find('::_q_') >= 0 3403 and not modified_identifier == "const_iterator" 3404 and not modified_identifier == "vm_throw" 3405 and not modified_identifier == "DFG_OPERATION"): 3406 error(line_number, 'readability/naming/underscores', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.") 3407 3408 # Check for variables named 'l', these are too easy to confuse with '1' in some fonts 3409 if modified_identifier == 'l': 3410 error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.") 3411 3412 # There can be only one declaration in non-for-control statements. 3413 if control_statement: 3414 return 3415 # We should continue checking if this is a function 3416 # declaration because we need to check its arguments. 3417 # Also, we need to check multiple declarations. 3418 if character_after_identifier != '(' and character_after_identifier != ',': 3419 return 3420 3421 number_of_identifiers += 1 3422 line = line[matched.end():] 3423 3424 def check_c_style_cast(line_number, line, raw_line, cast_type, pattern, 3425 error): 3426 """Checks for a C-style cast by looking for the pattern. 3427 3428 This also handles sizeof(type) warnings, due to similarity of content. 3429 3430 Args: 3431 line_number: The number of the line to check. 3432 line: The line of code to check. 3433 raw_line: The raw line of code to check, with comments. 3434 cast_type: The string for the C++ cast to recommend. This is either 3435 reinterpret_cast or static_cast, depending. 3436 pattern: The regular expression used to find C-style casts. 3437 error: The function to call with any errors found. 3438 """ 3439 matched = search(pattern, line) 3440 if not matched: 3441 return 3442 3443 # e.g., sizeof(int) 3444 sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1]) 3445 if sizeof_match: 3446 error(line_number, 'runtime/sizeof', 1, 3447 'Using sizeof(type). Use sizeof(varname) instead if possible') 3448 return 3449 3450 remainder = line[matched.end(0):] 3451 3452 # The close paren is for function pointers as arguments to a function. 3453 # eg, void foo(void (*bar)(int)); 3454 # The semicolon check is a more basic function check; also possibly a 3455 # function pointer typedef. 3456 # eg, void foo(int); or void foo(int) const; 3457 # The equals check is for function pointer assignment. 3458 # eg, void *(*foo)(int) = ... 3459 # 3460 # Right now, this will only catch cases where there's a single argument, and 3461 # it's unnamed. It should probably be expanded to check for multiple 3462 # arguments with some unnamed. 3463 function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder) 3464 if function_match: 3465 if (not function_match.group(3) 3466 or function_match.group(3) == ';' 3467 or raw_line.find('/*') < 0): 3468 error(line_number, 'readability/function', 3, 3469 'All parameters should be named in a function') 3470 return 3471 3472 # At this point, all that should be left is actual casts. 3473 error(line_number, 'readability/casting', 4, 3474 'Using C-style cast. Use %s<%s>(...) instead' % 3475 (cast_type, matched.group(1))) 3476 3477 3478 _HEADERS_CONTAINING_TEMPLATES = ( 3479 ('<deque>', ('deque',)), 3480 ('<functional>', ('unary_function', 'binary_function', 3481 'plus', 'minus', 'multiplies', 'divides', 'modulus', 3482 'negate', 3483 'equal_to', 'not_equal_to', 'greater', 'less', 3484 'greater_equal', 'less_equal', 3485 'logical_and', 'logical_or', 'logical_not', 3486 'unary_negate', 'not1', 'binary_negate', 'not2', 3487 'bind1st', 'bind2nd', 3488 'pointer_to_unary_function', 3489 'pointer_to_binary_function', 3490 'ptr_fun', 3491 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', 3492 'mem_fun_ref_t', 3493 'const_mem_fun_t', 'const_mem_fun1_t', 3494 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', 3495 'mem_fun_ref', 3496 )), 3497 ('<limits>', ('numeric_limits',)), 3498 ('<list>', ('list',)), 3499 ('<map>', ('map', 'multimap',)), 3500 ('<memory>', ('allocator',)), 3501 ('<queue>', ('queue', 'priority_queue',)), 3502 ('<set>', ('set', 'multiset',)), 3503 ('<stack>', ('stack',)), 3504 ('<string>', ('char_traits', 'basic_string',)), 3505 ('<utility>', ('pair',)), 3506 ('<vector>', ('vector',)), 3507 3508 # gcc extensions. 3509 # Note: std::hash is their hash, ::hash is our hash 3510 ('<hash_map>', ('hash_map', 'hash_multimap',)), 3511 ('<hash_set>', ('hash_set', 'hash_multiset',)), 3512 ('<slist>', ('slist',)), 3513 ) 3514 3515 _HEADERS_ACCEPTED_BUT_NOT_PROMOTED = { 3516 # We can trust with reasonable confidence that map gives us pair<>, too. 3517 'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap') 3518 } 3519 3520 _RE_PATTERN_STRING = re.compile(r'\bstring\b') 3521 3522 _re_pattern_algorithm_header = [] 3523 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap', 3524 'transform'): 3525 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or 3526 # type::max(). 3527 _re_pattern_algorithm_header.append( 3528 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), 3529 _template, 3530 '<algorithm>')) 3531 3532 _re_pattern_templates = [] 3533 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: 3534 for _template in _templates: 3535 _re_pattern_templates.append( 3536 (re.compile(r'(\<|\b)' + _template + r'\s*\<'), 3537 _template + '<>', 3538 _header)) 3539 3540 3541 def files_belong_to_same_module(filename_cpp, filename_h): 3542 """Check if these two filenames belong to the same module. 3543 3544 The concept of a 'module' here is a as follows: 3545 foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the 3546 same 'module' if they are in the same directory. 3547 some/path/public/xyzzy and some/path/internal/xyzzy are also considered 3548 to belong to the same module here. 3549 3550 If the filename_cpp contains a longer path than the filename_h, for example, 3551 '/absolute/path/to/base/sysinfo.cpp', and this file would include 3552 'base/sysinfo.h', this function also produces the prefix needed to open the 3553 header. This is used by the caller of this function to more robustly open the 3554 header file. We don't have access to the real include paths in this context, 3555 so we need this guesswork here. 3556 3557 Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module 3558 according to this implementation. Because of this, this function gives 3559 some false positives. This should be sufficiently rare in practice. 3560 3561 Args: 3562 filename_cpp: is the path for the .cpp file 3563 filename_h: is the path for the header path 3564 3565 Returns: 3566 Tuple with a bool and a string: 3567 bool: True if filename_cpp and filename_h belong to the same module. 3568 string: the additional prefix needed to open the header file. 3569 """ 3570 3571 if not filename_cpp.endswith('.cpp'): 3572 return (False, '') 3573 filename_cpp = filename_cpp[:-len('.cpp')] 3574 if filename_cpp.endswith('_unittest'): 3575 filename_cpp = filename_cpp[:-len('_unittest')] 3576 elif filename_cpp.endswith('_test'): 3577 filename_cpp = filename_cpp[:-len('_test')] 3578 filename_cpp = filename_cpp.replace('/public/', '/') 3579 filename_cpp = filename_cpp.replace('/internal/', '/') 3580 3581 if not filename_h.endswith('.h'): 3582 return (False, '') 3583 filename_h = filename_h[:-len('.h')] 3584 if filename_h.endswith('-inl'): 3585 filename_h = filename_h[:-len('-inl')] 3586 filename_h = filename_h.replace('/public/', '/') 3587 filename_h = filename_h.replace('/internal/', '/') 3588 3589 files_belong_to_same_module = filename_cpp.endswith(filename_h) 3590 common_path = '' 3591 if files_belong_to_same_module: 3592 common_path = filename_cpp[:-len(filename_h)] 3593 return files_belong_to_same_module, common_path 3594 3595 3596 def update_include_state(filename, include_state, io=codecs): 3597 """Fill up the include_state with new includes found from the file. 3598 3599 Args: 3600 filename: the name of the header to read. 3601 include_state: an _IncludeState instance in which the headers are inserted. 3602 io: The io factory to use to read the file. Provided for testability. 3603 3604 Returns: 3605 True if a header was succesfully added. False otherwise. 3606 """ 3607 io = _unit_test_config.get(INCLUDE_IO_INJECTION_KEY, codecs) 3608 header_file = None 3609 try: 3610 header_file = io.open(filename, 'r', 'utf8', 'replace') 3611 except IOError: 3612 return False 3613 line_number = 0 3614 for line in header_file: 3615 line_number += 1 3616 clean_line = cleanse_comments(line) 3617 matched = _RE_PATTERN_INCLUDE.search(clean_line) 3618 if matched: 3619 include = matched.group(2) 3620 # The value formatting is cute, but not really used right now. 3621 # What matters here is that the key is in include_state. 3622 include_state.setdefault(include, '%s:%d' % (filename, line_number)) 3623 return True 3624 3625 3626 def check_for_include_what_you_use(filename, clean_lines, include_state, error): 3627 """Reports for missing stl includes. 3628 3629 This function will output warnings to make sure you are including the headers 3630 necessary for the stl containers and functions that you use. We only give one 3631 reason to include a header. For example, if you use both equal_to<> and 3632 less<> in a .h file, only one (the latter in the file) of these will be 3633 reported as a reason to include the <functional>. 3634 3635 Args: 3636 filename: The name of the current file. 3637 clean_lines: A CleansedLines instance containing the file. 3638 include_state: An _IncludeState instance. 3639 error: The function to call with any errors found. 3640 """ 3641 required = {} # A map of header name to line_number and the template entity. 3642 # Example of required: { '<functional>': (1219, 'less<>') } 3643 3644 for line_number in xrange(clean_lines.num_lines()): 3645 line = clean_lines.elided[line_number] 3646 if not line or line[0] == '#': 3647 continue 3648 3649 # String is special -- it is a non-templatized type in STL. 3650 if _RE_PATTERN_STRING.search(line): 3651 required['<string>'] = (line_number, 'string') 3652 3653 for pattern, template, header in _re_pattern_algorithm_header: 3654 if pattern.search(line): 3655 required[header] = (line_number, template) 3656 3657 # The following function is just a speed up, no semantics are changed. 3658 if not '<' in line: # Reduces the cpu time usage by skipping lines. 3659 continue 3660 3661 for pattern, template, header in _re_pattern_templates: 3662 if pattern.search(line): 3663 required[header] = (line_number, template) 3664 3665 # The policy is that if you #include something in foo.h you don't need to 3666 # include it again in foo.cpp. Here, we will look at possible includes. 3667 # Let's copy the include_state so it is only messed up within this function. 3668 include_state = include_state.copy() 3669 3670 # Did we find the header for this file (if any) and succesfully load it? 3671 header_found = False 3672 3673 # Use the absolute path so that matching works properly. 3674 abs_filename = os.path.abspath(filename) 3675 3676 # For Emacs's flymake. 3677 # If cpp_style is invoked from Emacs's flymake, a temporary file is generated 3678 # by flymake and that file name might end with '_flymake.cpp'. In that case, 3679 # restore original file name here so that the corresponding header file can be 3680 # found. 3681 # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h' 3682 # instead of 'foo_flymake.h' 3683 abs_filename = re.sub(r'_flymake\.cpp$', '.cpp', abs_filename) 3684 3685 # include_state is modified during iteration, so we iterate over a copy of 3686 # the keys. 3687 for header in include_state.keys(): #NOLINT 3688 (same_module, common_path) = files_belong_to_same_module(abs_filename, header) 3689 fullpath = common_path + header 3690 if same_module and update_include_state(fullpath, include_state): 3691 header_found = True 3692 3693 # If we can't find the header file for a .cpp, assume it's because we don't 3694 # know where to look. In that case we'll give up as we're not sure they 3695 # didn't include it in the .h file. 3696 # FIXME: Do a better job of finding .h files so we are confident that 3697 # not having the .h file means there isn't one. 3698 if filename.endswith('.cpp') and not header_found: 3699 return 3700 3701 # All the lines have been processed, report the errors found. 3702 for required_header_unstripped in required: 3703 template = required[required_header_unstripped][1] 3704 if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED: 3705 headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template] 3706 if [True for header in headers if header in include_state]: 3707 continue 3708 if required_header_unstripped.strip('<>"') not in include_state: 3709 error(required[required_header_unstripped][0], 3710 'build/include_what_you_use', 4, 3711 'Add #include ' + required_header_unstripped + ' for ' + template) 3712 3713 3714 def process_line(filename, file_extension, 3715 clean_lines, line, include_state, function_state, 3716 class_state, file_state, enum_state, error): 3717 """Processes a single line in the file. 3718 3719 Args: 3720 filename: Filename of the file that is being processed. 3721 file_extension: The extension (dot not included) of the file. 3722 clean_lines: An array of strings, each representing a line of the file, 3723 with comments stripped. 3724 line: Number of line being processed. 3725 include_state: An _IncludeState instance in which the headers are inserted. 3726 function_state: A _FunctionState instance which counts function lines, etc. 3727 class_state: A _ClassState instance which maintains information about 3728 the current stack of nested class declarations being parsed. 3729 file_state: A _FileState instance which maintains information about 3730 the state of things in the file. 3731 enum_state: A _EnumState instance which maintains an enum declaration 3732 state. 3733 error: A callable to which errors are reported, which takes arguments: 3734 line number, error level, and message 3735 3736 """ 3737 raw_lines = clean_lines.raw_lines 3738 detect_functions(clean_lines, line, function_state, error) 3739 check_for_function_lengths(clean_lines, line, function_state, error) 3740 if search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines 3741 return 3742 if match(r'\s*\b__asm\b', raw_lines[line]): # Ignore asm lines as they format differently. 3743 return 3744 check_function_definition(filename, file_extension, clean_lines, line, function_state, error) 3745 check_pass_ptr_usage(clean_lines, line, function_state, error) 3746 check_for_leaky_patterns(clean_lines, line, function_state, error) 3747 check_for_multiline_comments_and_strings(clean_lines, line, error) 3748 check_style(clean_lines, line, file_extension, class_state, file_state, enum_state, error) 3749 check_language(filename, clean_lines, line, file_extension, include_state, 3750 file_state, error) 3751 check_for_non_standard_constructs(clean_lines, line, class_state, error) 3752 check_posix_threading(clean_lines, line, error) 3753 check_invalid_increment(clean_lines, line, error) 3754 check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line, error) 3755 3756 def _process_lines(filename, file_extension, lines, error, min_confidence): 3757 """Performs lint checks and reports any errors to the given error function. 3758 3759 Args: 3760 filename: Filename of the file that is being processed. 3761 file_extension: The extension (dot not included) of the file. 3762 lines: An array of strings, each representing a line of the file, with the 3763 last element being empty if the file is termined with a newline. 3764 error: A callable to which errors are reported, which takes 4 arguments: 3765 """ 3766 lines = (['// marker so line numbers and indices both start at 1'] + lines + 3767 ['// marker so line numbers end in a known way']) 3768 3769 include_state = _IncludeState() 3770 function_state = _FunctionState(min_confidence) 3771 class_state = _ClassState() 3772 3773 check_for_copyright(lines, error) 3774 3775 if file_extension == 'h': 3776 check_for_header_guard(filename, lines, error) 3777 3778 remove_multi_line_comments(lines, error) 3779 clean_lines = CleansedLines(lines) 3780 file_state = _FileState(clean_lines, file_extension) 3781 enum_state = _EnumState() 3782 for line in xrange(clean_lines.num_lines()): 3783 process_line(filename, file_extension, clean_lines, line, 3784 include_state, function_state, class_state, file_state, 3785 enum_state, error) 3786 class_state.check_finished(error) 3787 3788 check_for_include_what_you_use(filename, clean_lines, include_state, error) 3789 3790 # We check here rather than inside process_line so that we see raw 3791 # lines rather than "cleaned" lines. 3792 check_for_unicode_replacement_characters(lines, error) 3793 3794 check_for_new_line_at_eof(lines, error) 3795 3796 3797 class CppChecker(object): 3798 3799 """Processes C++ lines for checking style.""" 3800 3801 # This list is used to-- 3802 # 3803 # (1) generate an explicit list of all possible categories, 3804 # (2) unit test that all checked categories have valid names, and 3805 # (3) unit test that all categories are getting unit tested. 3806 # 3807 categories = set([ 3808 'build/class', 3809 'build/deprecated', 3810 'build/endif_comment', 3811 'build/forward_decl', 3812 'build/header_guard', 3813 'build/include', 3814 'build/include_order', 3815 'build/include_what_you_use', 3816 'build/namespaces', 3817 'build/printf_format', 3818 'build/storage_class', 3819 'build/using_std', 3820 'legal/copyright', 3821 'readability/braces', 3822 'readability/casting', 3823 'readability/check', 3824 'readability/comparison_to_zero', 3825 'readability/constructors', 3826 'readability/control_flow', 3827 'readability/enum_casing', 3828 'readability/fn_size', 3829 'readability/function', 3830 'readability/multiline_comment', 3831 'readability/multiline_string', 3832 'readability/parameter_name', 3833 'readability/naming', 3834 'readability/naming/underscores', 3835 'readability/null', 3836 'readability/pass_ptr', 3837 'readability/streams', 3838 'readability/todo', 3839 'readability/utf8', 3840 'readability/webkit_export', 3841 'runtime/arrays', 3842 'runtime/bitfields', 3843 'runtime/casting', 3844 'runtime/ctype_function', 3845 'runtime/explicit', 3846 'runtime/init', 3847 'runtime/int', 3848 'runtime/invalid_increment', 3849 'runtime/leaky_pattern', 3850 'runtime/max_min_macros', 3851 'runtime/memset', 3852 'runtime/printf', 3853 'runtime/printf_format', 3854 'runtime/references', 3855 'runtime/rtti', 3856 'runtime/sizeof', 3857 'runtime/string', 3858 'runtime/threadsafe_fn', 3859 'runtime/unsigned', 3860 'runtime/virtual', 3861 'whitespace/blank_line', 3862 'whitespace/braces', 3863 'whitespace/comma', 3864 'whitespace/comments', 3865 'whitespace/declaration', 3866 'whitespace/end_of_line', 3867 'whitespace/ending_newline', 3868 'whitespace/indent', 3869 'whitespace/line_length', 3870 'whitespace/newline', 3871 'whitespace/operators', 3872 'whitespace/parens', 3873 'whitespace/semicolon', 3874 'whitespace/tab', 3875 'whitespace/todo', 3876 ]) 3877 3878 def __init__(self, file_path, file_extension, handle_style_error, 3879 min_confidence): 3880 """Create a CppChecker instance. 3881 3882 Args: 3883 file_extension: A string that is the file extension, without 3884 the leading dot. 3885 3886 """ 3887 self.file_extension = file_extension 3888 self.file_path = file_path 3889 self.handle_style_error = handle_style_error 3890 self.min_confidence = min_confidence 3891 3892 # Useful for unit testing. 3893 def __eq__(self, other): 3894 """Return whether this CppChecker instance is equal to another.""" 3895 if self.file_extension != other.file_extension: 3896 return False 3897 if self.file_path != other.file_path: 3898 return False 3899 if self.handle_style_error != other.handle_style_error: 3900 return False 3901 if self.min_confidence != other.min_confidence: 3902 return False 3903 3904 return True 3905 3906 # Useful for unit testing. 3907 def __ne__(self, other): 3908 # Python does not automatically deduce __ne__() from __eq__(). 3909 return not self.__eq__(other) 3910 3911 def check(self, lines): 3912 _process_lines(self.file_path, self.file_extension, lines, 3913 self.handle_style_error, self.min_confidence) 3914 3915 3916 # FIXME: Remove this function (requires refactoring unit tests). 3917 def process_file_data(filename, file_extension, lines, error, min_confidence, unit_test_config): 3918 global _unit_test_config 3919 _unit_test_config = unit_test_config 3920 checker = CppChecker(filename, file_extension, error, min_confidence) 3921 checker.check(lines) 3922 _unit_test_config = {} 3923