1 # -*- coding: utf-8 -*- 2 # 3 # Copyright (C) 2009, 2010, 2012 Google Inc. All rights reserved. 4 # Copyright (C) 2009 Torch Mobile Inc. 5 # Copyright (C) 2009 Apple Inc. All rights reserved. 6 # Copyright (C) 2010 Chris Jerdonek (cjerdonek (at] webkit.org) 7 # 8 # Redistribution and use in source and binary forms, with or without 9 # modification, are permitted provided that the following conditions are 10 # met: 11 # 12 # * Redistributions of source code must retain the above copyright 13 # notice, this list of conditions and the following disclaimer. 14 # * Redistributions in binary form must reproduce the above 15 # copyright notice, this list of conditions and the following disclaimer 16 # in the documentation and/or other materials provided with the 17 # distribution. 18 # * Neither the name of Google Inc. nor the names of its 19 # contributors may be used to endorse or promote products derived from 20 # this software without specific prior written permission. 21 # 22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 34 # This is the modified version of Google's cpplint. The original code is 35 # http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py 36 37 """Support for check-webkit-style.""" 38 39 import math # for log 40 import os 41 import os.path 42 import re 43 import sre_compile 44 import string 45 import sys 46 import unicodedata 47 48 from webkitpy.common.memoized import memoized 49 from webkitpy.common.system.filesystem import FileSystem 50 51 # Headers that we consider STL headers. 52 _STL_HEADERS = frozenset([ 53 'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception', 54 'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set', 55 'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h', 56 'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack', 57 'stl_alloc.h', 'stl_relops.h', 'type_traits.h', 58 'utility', 'vector', 'vector.h', 59 ]) 60 61 62 # Non-STL C++ system headers. 63 _CPP_HEADERS = frozenset([ 64 'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype', 65 'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath', 66 'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef', 67 'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype', 68 'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream', 69 'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip', 70 'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h', 71 'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h', 72 'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h', 73 'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h', 74 'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept', 75 'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string', 76 'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray', 77 ]) 78 79 80 # Assertion macros. These are defined in base/logging.h and 81 # testing/base/gunit.h. Note that the _M versions need to come first 82 # for substring matching to work. 83 _CHECK_MACROS = [ 84 'DCHECK', 'CHECK', 85 'EXPECT_TRUE_M', 'EXPECT_TRUE', 86 'ASSERT_TRUE_M', 'ASSERT_TRUE', 87 'EXPECT_FALSE_M', 'EXPECT_FALSE', 88 'ASSERT_FALSE_M', 'ASSERT_FALSE', 89 ] 90 91 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE 92 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS]) 93 94 for op, replacement in [('==', 'EQ'), ('!=', 'NE'), 95 ('>=', 'GE'), ('>', 'GT'), 96 ('<=', 'LE'), ('<', 'LT')]: 97 _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement 98 _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement 99 _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement 100 _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement 101 _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement 102 _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement 103 104 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), 105 ('>=', 'LT'), ('>', 'LE'), 106 ('<=', 'GT'), ('<', 'GE')]: 107 _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement 108 _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement 109 _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement 110 _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement 111 112 113 # These constants define types of headers for use with 114 # _IncludeState.check_next_include_order(). 115 _CONFIG_HEADER = 0 116 _PRIMARY_HEADER = 1 117 _OTHER_HEADER = 2 118 _MOC_HEADER = 3 119 120 121 # The regexp compilation caching is inlined in all regexp functions for 122 # performance reasons; factoring it out into a separate function turns out 123 # to be noticeably expensive. 124 _regexp_compile_cache = {} 125 126 127 def match(pattern, s): 128 """Matches the string with the pattern, caching the compiled regexp.""" 129 if not pattern in _regexp_compile_cache: 130 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 131 return _regexp_compile_cache[pattern].match(s) 132 133 134 def search(pattern, s): 135 """Searches the string for the pattern, caching the compiled regexp.""" 136 if not pattern in _regexp_compile_cache: 137 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 138 return _regexp_compile_cache[pattern].search(s) 139 140 141 def sub(pattern, replacement, s): 142 """Substitutes occurrences of a pattern, caching the compiled regexp.""" 143 if not pattern in _regexp_compile_cache: 144 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 145 return _regexp_compile_cache[pattern].sub(replacement, s) 146 147 148 def subn(pattern, replacement, s): 149 """Substitutes occurrences of a pattern, caching the compiled regexp.""" 150 if not pattern in _regexp_compile_cache: 151 _regexp_compile_cache[pattern] = sre_compile.compile(pattern) 152 return _regexp_compile_cache[pattern].subn(replacement, s) 153 154 155 def iteratively_replace_matches_with_char(pattern, char_replacement, s): 156 """Returns the string with replacement done. 157 158 Every character in the match is replaced with char. 159 Due to the iterative nature, pattern should not match char or 160 there will be an infinite loop. 161 162 Example: 163 pattern = r'<[^>]>' # template parameters 164 char_replacement = '_' 165 s = 'A<B<C, D>>' 166 Returns 'A_________' 167 168 Args: 169 pattern: The regex to match. 170 char_replacement: The character to put in place of every 171 character of the match. 172 s: The string on which to do the replacements. 173 174 Returns: 175 True, if the given line is blank. 176 """ 177 while True: 178 matched = search(pattern, s) 179 if not matched: 180 return s 181 start_match_index = matched.start(0) 182 end_match_index = matched.end(0) 183 match_length = end_match_index - start_match_index 184 s = s[:start_match_index] + char_replacement * match_length + s[end_match_index:] 185 186 187 def _find_in_lines(regex, lines, start_position, not_found_position): 188 """Does a find starting at start position and going forward until 189 a match is found. 190 191 Returns the position where the regex started. 192 """ 193 current_row = start_position.row 194 195 # Start with the given row and trim off everything before what should be matched. 196 current_line = lines[start_position.row][start_position.column:] 197 starting_offset = start_position.column 198 while True: 199 found_match = search(regex, current_line) 200 if found_match: 201 return Position(current_row, starting_offset + found_match.start()) 202 203 # A match was not found so continue forward. 204 current_row += 1 205 starting_offset = 0 206 if current_row >= len(lines): 207 return not_found_position 208 current_line = lines[current_row] 209 210 def _rfind_in_lines(regex, lines, start_position, not_found_position): 211 """Does a reverse find starting at start position and going backwards until 212 a match is found. 213 214 Returns the position where the regex ended. 215 """ 216 # Put the regex in a group and proceed it with a greedy expression that 217 # matches anything to ensure that we get the last possible match in a line. 218 last_in_line_regex = r'.*(' + regex + ')' 219 current_row = start_position.row 220 221 # Start with the given row and trim off everything past what may be matched. 222 current_line = lines[start_position.row][:start_position.column] 223 while True: 224 found_match = match(last_in_line_regex, current_line) 225 if found_match: 226 return Position(current_row, found_match.end(1)) 227 228 # A match was not found so continue backward. 229 current_row -= 1 230 if current_row < 0: 231 return not_found_position 232 current_line = lines[current_row] 233 234 235 def _convert_to_lower_with_underscores(text): 236 """Converts all text strings in camelCase or PascalCase to lowers with underscores.""" 237 238 # First add underscores before any capital letter followed by a lower case letter 239 # as long as it is in a word. 240 # (This put an underscore before Password but not P and A in WPAPassword). 241 text = sub(r'(?<=[A-Za-z0-9])([A-Z])(?=[a-z])', r'_\1', text) 242 243 # Next add underscores before capitals at the end of words if it was 244 # preceeded by lower case letter or number. 245 # (This puts an underscore before A in isA but not A in CBA). 246 text = sub(r'(?<=[a-z0-9])([A-Z])(?=\b)', r'_\1', text) 247 248 # Next add underscores when you have a captial letter which is followed by a capital letter 249 # but is not proceeded by one. (This puts an underscore before A in 'WordADay'). 250 text = sub(r'(?<=[a-z0-9])([A-Z][A-Z_])', r'_\1', text) 251 252 return text.lower() 253 254 255 256 def _create_acronym(text): 257 """Creates an acronym for the given text.""" 258 # Removes all lower case letters except those starting words. 259 text = sub(r'(?<!\b)[a-z]', '', text) 260 return text.upper() 261 262 263 def up_to_unmatched_closing_paren(s): 264 """Splits a string into two parts up to first unmatched ')'. 265 266 Args: 267 s: a string which is a substring of line after '(' 268 (e.g., "a == (b + c))"). 269 270 Returns: 271 A pair of strings (prefix before first unmatched ')', 272 remainder of s after first unmatched ')'), e.g., 273 up_to_unmatched_closing_paren("a == (b + c)) { ") 274 returns "a == (b + c)", " {". 275 Returns None, None if there is no unmatched ')' 276 277 """ 278 i = 1 279 for pos, c in enumerate(s): 280 if c == '(': 281 i += 1 282 elif c == ')': 283 i -= 1 284 if i == 0: 285 return s[:pos], s[pos + 1:] 286 return None, None 287 288 class _IncludeState(dict): 289 """Tracks line numbers for includes, and the order in which includes appear. 290 291 As a dict, an _IncludeState object serves as a mapping between include 292 filename and line number on which that file was included. 293 294 Call check_next_include_order() once for each header in the file, passing 295 in the type constants defined above. Calls in an illegal order will 296 raise an _IncludeError with an appropriate error message. 297 298 """ 299 # self._section will move monotonically through this set. If it ever 300 # needs to move backwards, check_next_include_order will raise an error. 301 _INITIAL_SECTION = 0 302 _CONFIG_SECTION = 1 303 _PRIMARY_SECTION = 2 304 _OTHER_SECTION = 3 305 306 _TYPE_NAMES = { 307 _CONFIG_HEADER: 'WebCore config.h', 308 _PRIMARY_HEADER: 'header this file implements', 309 _OTHER_HEADER: 'other header', 310 _MOC_HEADER: 'moc file', 311 } 312 _SECTION_NAMES = { 313 _INITIAL_SECTION: "... nothing.", 314 _CONFIG_SECTION: "WebCore config.h.", 315 _PRIMARY_SECTION: 'a header this file implements.', 316 _OTHER_SECTION: 'other header.', 317 } 318 319 def __init__(self): 320 dict.__init__(self) 321 self._section = self._INITIAL_SECTION 322 self._visited_primary_section = False 323 self.header_types = dict(); 324 325 def visited_primary_section(self): 326 return self._visited_primary_section 327 328 def check_next_include_order(self, header_type, file_is_header, primary_header_exists): 329 """Returns a non-empty error message if the next header is out of order. 330 331 This function also updates the internal state to be ready to check 332 the next include. 333 334 Args: 335 header_type: One of the _XXX_HEADER constants defined above. 336 file_is_header: Whether the file that owns this _IncludeState is itself a header 337 338 Returns: 339 The empty string if the header is in the right order, or an 340 error message describing what's wrong. 341 342 """ 343 if header_type == _CONFIG_HEADER and file_is_header: 344 return 'Header file should not contain WebCore config.h.' 345 if header_type == _PRIMARY_HEADER and file_is_header: 346 return 'Header file should not contain itself.' 347 if header_type == _MOC_HEADER: 348 return '' 349 350 error_message = '' 351 if self._section != self._OTHER_SECTION: 352 before_error_message = ('Found %s before %s' % 353 (self._TYPE_NAMES[header_type], 354 self._SECTION_NAMES[self._section + 1])) 355 after_error_message = ('Found %s after %s' % 356 (self._TYPE_NAMES[header_type], 357 self._SECTION_NAMES[self._section])) 358 359 if header_type == _CONFIG_HEADER: 360 if self._section >= self._CONFIG_SECTION: 361 error_message = after_error_message 362 self._section = self._CONFIG_SECTION 363 elif header_type == _PRIMARY_HEADER: 364 if self._section >= self._PRIMARY_SECTION: 365 error_message = after_error_message 366 elif self._section < self._CONFIG_SECTION: 367 error_message = before_error_message 368 self._section = self._PRIMARY_SECTION 369 self._visited_primary_section = True 370 else: 371 assert header_type == _OTHER_HEADER 372 if not file_is_header and self._section < self._PRIMARY_SECTION: 373 if primary_header_exists: 374 error_message = before_error_message 375 self._section = self._OTHER_SECTION 376 377 return error_message 378 379 380 class Position(object): 381 """Holds the position of something.""" 382 def __init__(self, row, column): 383 self.row = row 384 self.column = column 385 386 def __str__(self): 387 return '(%s, %s)' % (self.row, self.column) 388 389 def __cmp__(self, other): 390 return self.row.__cmp__(other.row) or self.column.__cmp__(other.column) 391 392 393 class Parameter(object): 394 """Information about one function parameter.""" 395 def __init__(self, parameter, parameter_name_index, row): 396 self.type = parameter[:parameter_name_index].strip() 397 # Remove any initializers from the parameter name (e.g. int i = 5). 398 self.name = sub(r'=.*', '', parameter[parameter_name_index:]).strip() 399 self.row = row 400 401 @memoized 402 def lower_with_underscores_name(self): 403 """Returns the parameter name in the lower with underscores format.""" 404 return _convert_to_lower_with_underscores(self.name) 405 406 407 class SingleLineView(object): 408 """Converts multiple lines into a single line (with line breaks replaced by a 409 space) to allow for easier searching.""" 410 def __init__(self, lines, start_position, end_position): 411 """Create a SingleLineView instance. 412 413 Args: 414 lines: a list of multiple lines to combine into a single line. 415 start_position: offset within lines of where to start the single line. 416 end_position: just after where to end (like a slice operation). 417 """ 418 # Get the rows of interest. 419 trimmed_lines = lines[start_position.row:end_position.row + 1] 420 421 # Remove the columns on the last line that aren't included. 422 trimmed_lines[-1] = trimmed_lines[-1][:end_position.column] 423 424 # Remove the columns on the first line that aren't included. 425 trimmed_lines[0] = trimmed_lines[0][start_position.column:] 426 427 # Create a single line with all of the parameters. 428 self.single_line = ' '.join(trimmed_lines) 429 430 # Keep the row lengths, so we can calculate the original row number 431 # given a column in the single line (adding 1 due to the space added 432 # during the join). 433 self._row_lengths = [len(line) + 1 for line in trimmed_lines] 434 self._starting_row = start_position.row 435 436 def convert_column_to_row(self, single_line_column_number): 437 """Convert the column number from the single line into the original 438 line number. 439 440 Special cases: 441 * Columns in the added spaces are considered part of the previous line. 442 * Columns beyond the end of the line are consider part the last line 443 in the view.""" 444 total_columns = 0 445 row_offset = 0 446 while row_offset < len(self._row_lengths) - 1 and single_line_column_number >= total_columns + self._row_lengths[row_offset]: 447 total_columns += self._row_lengths[row_offset] 448 row_offset += 1 449 return self._starting_row + row_offset 450 451 452 def create_skeleton_parameters(all_parameters): 453 """Converts a parameter list to a skeleton version. 454 455 The skeleton only has one word for the parameter name, one word for the type, 456 and commas after each parameter and only there. Everything in the skeleton 457 remains in the same columns as the original.""" 458 all_simplifications = ( 459 # Remove template parameters, function declaration parameters, etc. 460 r'(<[^<>]*?>)|(\([^\(\)]*?\))|(\{[^\{\}]*?\})', 461 # Remove all initializers. 462 r'=[^,]*', 463 # Remove :: and everything before it. 464 r'[^,]*::', 465 # Remove modifiers like &, *. 466 r'[&*]', 467 # Remove const modifiers. 468 r'\bconst\s+(?=[A-Za-z])', 469 # Remove numerical modifiers like long. 470 r'\b(unsigned|long|short)\s+(?=unsigned|long|short|int|char|double|float)') 471 472 skeleton_parameters = all_parameters 473 for simplification in all_simplifications: 474 skeleton_parameters = iteratively_replace_matches_with_char(simplification, ' ', skeleton_parameters) 475 # If there are any parameters, then add a , after the last one to 476 # make a regular pattern of a , following every parameter. 477 if skeleton_parameters.strip(): 478 skeleton_parameters += ',' 479 return skeleton_parameters 480 481 482 def find_parameter_name_index(skeleton_parameter): 483 """Determines where the parametere name starts given the skeleton parameter.""" 484 # The first space from the right in the simplified parameter is where the parameter 485 # name starts unless the first space is before any content in the simplified parameter. 486 before_name_index = skeleton_parameter.rstrip().rfind(' ') 487 if before_name_index != -1 and skeleton_parameter[:before_name_index].strip(): 488 return before_name_index + 1 489 return len(skeleton_parameter) 490 491 492 def parameter_list(elided_lines, start_position, end_position): 493 """Generator for a function's parameters.""" 494 # Create new positions that omit the outer parenthesis of the parameters. 495 start_position = Position(row=start_position.row, column=start_position.column + 1) 496 end_position = Position(row=end_position.row, column=end_position.column - 1) 497 single_line_view = SingleLineView(elided_lines, start_position, end_position) 498 skeleton_parameters = create_skeleton_parameters(single_line_view.single_line) 499 end_index = -1 500 501 while True: 502 # Find the end of the next parameter. 503 start_index = end_index + 1 504 end_index = skeleton_parameters.find(',', start_index) 505 506 # No comma means that all parameters have been parsed. 507 if end_index == -1: 508 return 509 row = single_line_view.convert_column_to_row(end_index) 510 511 # Parse the parameter into a type and parameter name. 512 skeleton_parameter = skeleton_parameters[start_index:end_index] 513 name_offset = find_parameter_name_index(skeleton_parameter) 514 parameter = single_line_view.single_line[start_index:end_index] 515 yield Parameter(parameter, name_offset, row) 516 517 518 class _FunctionState(object): 519 """Tracks current function name and the number of lines in its body. 520 521 Attributes: 522 min_confidence: The minimum confidence level to use while checking style. 523 524 """ 525 526 _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. 527 _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. 528 529 def __init__(self, min_confidence): 530 self.min_confidence = min_confidence 531 self.current_function = '' 532 self.in_a_function = False 533 self.lines_in_function = 0 534 # Make sure these will not be mistaken for real positions (even when a 535 # small amount is added to them). 536 self.body_start_position = Position(-1000, 0) 537 self.end_position = Position(-1000, 0) 538 539 def begin(self, function_name, function_name_start_position, body_start_position, end_position, 540 parameter_start_position, parameter_end_position, clean_lines): 541 """Start analyzing function body. 542 543 Args: 544 function_name: The name of the function being tracked. 545 function_name_start_position: Position in elided where the function name starts. 546 body_start_position: Position in elided of the { or the ; for a prototype. 547 end_position: Position in elided just after the final } (or ; is. 548 parameter_start_position: Position in elided of the '(' for the parameters. 549 parameter_end_position: Position in elided just after the ')' for the parameters. 550 clean_lines: A CleansedLines instance containing the file. 551 """ 552 self.in_a_function = True 553 self.lines_in_function = -1 # Don't count the open brace line. 554 self.current_function = function_name 555 self.function_name_start_position = function_name_start_position 556 self.body_start_position = body_start_position 557 self.end_position = end_position 558 self.is_declaration = clean_lines.elided[body_start_position.row][body_start_position.column] == ';' 559 self.parameter_start_position = parameter_start_position 560 self.parameter_end_position = parameter_end_position 561 self.is_pure = False 562 if self.is_declaration: 563 characters_after_parameters = SingleLineView(clean_lines.elided, parameter_end_position, body_start_position).single_line 564 self.is_pure = bool(match(r'\s*=\s*0\s*', characters_after_parameters)) 565 self._clean_lines = clean_lines 566 self._parameter_list = None 567 568 def modifiers_and_return_type(self): 569 """Returns the modifiers and the return type.""" 570 # Go backwards from where the function name is until we encounter one of several things: 571 # ';' or '{' or '}' or 'private:', etc. or '#' or return Position(0, 0) 572 elided = self._clean_lines.elided 573 start_modifiers = _rfind_in_lines(r';|\{|\}|((private|public|protected):)|(#.*)', 574 elided, self.parameter_start_position, Position(0, 0)) 575 return SingleLineView(elided, start_modifiers, self.function_name_start_position).single_line.strip() 576 577 def parameter_list(self): 578 if not self._parameter_list: 579 # Store the final result as a tuple since that is immutable. 580 self._parameter_list = tuple(parameter_list(self._clean_lines.elided, self.parameter_start_position, self.parameter_end_position)) 581 582 return self._parameter_list 583 584 def count(self, line_number): 585 """Count line in current function body.""" 586 if self.in_a_function and line_number >= self.body_start_position.row: 587 self.lines_in_function += 1 588 589 def check(self, error, line_number): 590 """Report if too many lines in function body. 591 592 Args: 593 error: The function to call with any errors found. 594 line_number: The number of the line to check. 595 """ 596 if match(r'T(EST|est)', self.current_function): 597 base_trigger = self._TEST_TRIGGER 598 else: 599 base_trigger = self._NORMAL_TRIGGER 600 trigger = base_trigger * 2 ** self.min_confidence 601 602 if self.lines_in_function > trigger: 603 error_level = int(math.log(self.lines_in_function / base_trigger, 2)) 604 # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... 605 if error_level > 5: 606 error_level = 5 607 error(line_number, 'readability/fn_size', error_level, 608 'Small and focused functions are preferred:' 609 ' %s has %d non-comment lines' 610 ' (error triggered by exceeding %d lines).' % ( 611 self.current_function, self.lines_in_function, trigger)) 612 613 def end(self): 614 """Stop analyzing function body.""" 615 self.in_a_function = False 616 617 618 class _IncludeError(Exception): 619 """Indicates a problem with the include order in a file.""" 620 pass 621 622 623 class FileInfo: 624 """Provides utility functions for filenames. 625 626 FileInfo provides easy access to the components of a file's path 627 relative to the project root. 628 """ 629 630 def __init__(self, filename): 631 self._filename = filename 632 633 def full_name(self): 634 """Make Windows paths like Unix.""" 635 return os.path.abspath(self._filename).replace('\\', '/') 636 637 def repository_name(self): 638 """Full name after removing the local path to the repository. 639 640 If we have a real absolute path name here we can try to do something smart: 641 detecting the root of the checkout and truncating /path/to/checkout from 642 the name so that we get header guards that don't include things like 643 "C:\Documents and Settings\..." or "/home/username/..." in them and thus 644 people on different computers who have checked the source out to different 645 locations won't see bogus errors. 646 """ 647 fullname = self.full_name() 648 649 if os.path.exists(fullname): 650 project_dir = os.path.dirname(fullname) 651 652 if os.path.exists(os.path.join(project_dir, ".svn")): 653 # If there's a .svn file in the current directory, we 654 # recursively look up the directory tree for the top 655 # of the SVN checkout 656 root_dir = project_dir 657 one_up_dir = os.path.dirname(root_dir) 658 while os.path.exists(os.path.join(one_up_dir, ".svn")): 659 root_dir = os.path.dirname(root_dir) 660 one_up_dir = os.path.dirname(one_up_dir) 661 662 prefix = os.path.commonprefix([root_dir, project_dir]) 663 return fullname[len(prefix) + 1:] 664 665 # Not SVN? Try to find a git top level directory by 666 # searching up from the current path. 667 root_dir = os.path.dirname(fullname) 668 while (root_dir != os.path.dirname(root_dir) 669 and not os.path.exists(os.path.join(root_dir, ".git"))): 670 root_dir = os.path.dirname(root_dir) 671 if os.path.exists(os.path.join(root_dir, ".git")): 672 prefix = os.path.commonprefix([root_dir, project_dir]) 673 return fullname[len(prefix) + 1:] 674 675 # Don't know what to do; header guard warnings may be wrong... 676 return fullname 677 678 def split(self): 679 """Splits the file into the directory, basename, and extension. 680 681 For 'chrome/browser/browser.cpp', Split() would 682 return ('chrome/browser', 'browser', '.cpp') 683 684 Returns: 685 A tuple of (directory, basename, extension). 686 """ 687 688 googlename = self.repository_name() 689 project, rest = os.path.split(googlename) 690 return (project,) + os.path.splitext(rest) 691 692 def base_name(self): 693 """File base name - text after the final slash, before the final period.""" 694 return self.split()[1] 695 696 def extension(self): 697 """File extension - text following the final period.""" 698 return self.split()[2] 699 700 def no_extension(self): 701 """File has no source file extension.""" 702 return '/'.join(self.split()[0:2]) 703 704 def is_source(self): 705 """File has a source file extension.""" 706 return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx') 707 708 709 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard. 710 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( 711 r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') 712 # Matches strings. Escape codes should already be removed by ESCAPES. 713 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"') 714 # Matches characters. Escape codes should already be removed by ESCAPES. 715 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'") 716 # Matches multi-line C++ comments. 717 # This RE is a little bit more complicated than one might expect, because we 718 # have to take care of space removals tools so we can handle comments inside 719 # statements better. 720 # The current rule is: We only clear spaces from both sides when we're at the 721 # end of the line. Otherwise, we try to remove spaces from the right side, 722 # if this doesn't work we try on left side but only if there's a non-character 723 # on the right. 724 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( 725 r"""(\s*/\*.*\*/\s*$| 726 /\*.*\*/\s+| 727 \s+/\*.*\*/(?=\W)| 728 /\*.*\*/)""", re.VERBOSE) 729 730 731 def is_cpp_string(line): 732 """Does line terminate so, that the next symbol is in string constant. 733 734 This function does not consider single-line nor multi-line comments. 735 736 Args: 737 line: is a partial line of code starting from the 0..n. 738 739 Returns: 740 True, if next character appended to 'line' is inside a 741 string constant. 742 """ 743 744 line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" 745 return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 746 747 748 def find_next_multi_line_comment_start(lines, line_index): 749 """Find the beginning marker for a multiline comment.""" 750 while line_index < len(lines): 751 if lines[line_index].strip().startswith('/*'): 752 # Only return this marker if the comment goes beyond this line 753 if lines[line_index].strip().find('*/', 2) < 0: 754 return line_index 755 line_index += 1 756 return len(lines) 757 758 759 def find_next_multi_line_comment_end(lines, line_index): 760 """We are inside a comment, find the end marker.""" 761 while line_index < len(lines): 762 if lines[line_index].strip().endswith('*/'): 763 return line_index 764 line_index += 1 765 return len(lines) 766 767 768 def remove_multi_line_comments_from_range(lines, begin, end): 769 """Clears a range of lines for multi-line comments.""" 770 # Having // dummy comments makes the lines non-empty, so we will not get 771 # unnecessary blank line warnings later in the code. 772 for i in range(begin, end): 773 lines[i] = '// dummy' 774 775 776 def remove_multi_line_comments(lines, error): 777 """Removes multiline (c-style) comments from lines.""" 778 line_index = 0 779 while line_index < len(lines): 780 line_index_begin = find_next_multi_line_comment_start(lines, line_index) 781 if line_index_begin >= len(lines): 782 return 783 line_index_end = find_next_multi_line_comment_end(lines, line_index_begin) 784 if line_index_end >= len(lines): 785 error(line_index_begin + 1, 'readability/multiline_comment', 5, 786 'Could not find end of multi-line comment') 787 return 788 remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1) 789 line_index = line_index_end + 1 790 791 792 def cleanse_comments(line): 793 """Removes //-comments and single-line C-style /* */ comments. 794 795 Args: 796 line: A line of C++ source. 797 798 Returns: 799 The line with single-line comments removed. 800 """ 801 comment_position = line.find('//') 802 if comment_position != -1 and not is_cpp_string(line[:comment_position]): 803 line = line[:comment_position] 804 # get rid of /* ... */ 805 return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) 806 807 808 class CleansedLines(object): 809 """Holds 3 copies of all lines with different preprocessing applied to them. 810 811 1) elided member contains lines without strings and comments, 812 2) lines member contains lines without comments, and 813 3) raw member contains all the lines without processing. 814 All these three members are of <type 'list'>, and of the same length. 815 """ 816 817 def __init__(self, lines): 818 self.elided = [] 819 self.lines = [] 820 self.raw_lines = lines 821 self._num_lines = len(lines) 822 for line_number in range(len(lines)): 823 self.lines.append(cleanse_comments(lines[line_number])) 824 elided = self.collapse_strings(lines[line_number]) 825 self.elided.append(cleanse_comments(elided)) 826 827 def num_lines(self): 828 """Returns the number of lines represented.""" 829 return self._num_lines 830 831 @staticmethod 832 def collapse_strings(elided): 833 """Collapses strings and chars on a line to simple "" or '' blocks. 834 835 We nix strings first so we're not fooled by text like '"http://"' 836 837 Args: 838 elided: The line being processed. 839 840 Returns: 841 The line with collapsed strings. 842 """ 843 if not _RE_PATTERN_INCLUDE.match(elided): 844 # Remove escaped characters first to make quote/single quote collapsing 845 # basic. Things that look like escaped characters shouldn't occur 846 # outside of strings and chars. 847 elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) 848 elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided) 849 elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided) 850 return elided 851 852 853 def close_expression(elided, position): 854 """If input points to ( or { or [, finds the position that closes it. 855 856 If elided[position.row][position.column] points to a '(' or '{' or '[', 857 finds the line_number/pos that correspond to the closing of the expression. 858 859 Args: 860 elided: A CleansedLines.elided instance containing the file. 861 position: The position of the opening item. 862 863 Returns: 864 The Position *past* the closing brace, or Position(len(elided), -1) 865 if we never find a close. Note we ignore strings and comments when matching. 866 """ 867 line = elided[position.row] 868 start_character = line[position.column] 869 if start_character == '(': 870 enclosing_character_regex = r'[\(\)]' 871 elif start_character == '[': 872 enclosing_character_regex = r'[\[\]]' 873 elif start_character == '{': 874 enclosing_character_regex = r'[\{\}]' 875 else: 876 return Position(len(elided), -1) 877 878 current_column = position.column + 1 879 line_number = position.row 880 net_open = 1 881 for line in elided[position.row:]: 882 line = line[current_column:] 883 884 # Search the current line for opening and closing characters. 885 while True: 886 next_enclosing_character = search(enclosing_character_regex, line) 887 # No more on this line. 888 if not next_enclosing_character: 889 break 890 current_column += next_enclosing_character.end(0) 891 line = line[next_enclosing_character.end(0):] 892 if next_enclosing_character.group(0) == start_character: 893 net_open += 1 894 else: 895 net_open -= 1 896 if not net_open: 897 return Position(line_number, current_column) 898 899 # Proceed to the next line. 900 line_number += 1 901 current_column = 0 902 903 # The given item was not closed. 904 return Position(len(elided), -1) 905 906 def check_for_copyright(lines, error): 907 """Logs an error if no Copyright message appears at the top of the file.""" 908 909 # We'll say it should occur by line 10. Don't forget there's a 910 # dummy line at the front. 911 for line in xrange(1, min(len(lines), 11)): 912 if re.search(r'Copyright', lines[line], re.I): 913 break 914 else: # means no copyright line was found 915 error(0, 'legal/copyright', 5, 916 'No copyright message found. ' 917 'You should have a line: "Copyright [year] <Copyright Owner>"') 918 919 920 def get_header_guard_cpp_variable(filename): 921 """Returns the CPP variable that should be used as a header guard. 922 923 Args: 924 filename: The name of a C++ header file. 925 926 Returns: 927 The CPP variable that should be used as a header guard in the 928 named file. 929 930 """ 931 932 # Restores original filename in case that style checker is invoked from Emacs's 933 # flymake. 934 filename = re.sub(r'_flymake\.h$', '.h', filename) 935 936 standard_name = sub(r'[-.\s]', '_', os.path.basename(filename)) 937 938 # Files under WTF typically have header guards that start with WTF_. 939 if '/wtf/' in filename: 940 special_name = "WTF_" + standard_name 941 else: 942 special_name = standard_name 943 return (special_name, standard_name) 944 945 946 def check_for_header_guard(filename, lines, error): 947 """Checks that the file contains a header guard. 948 949 Logs an error if no #ifndef header guard is present. For other 950 headers, checks that the full pathname is used. 951 952 Args: 953 filename: The name of the C++ header file. 954 lines: An array of strings, each representing a line of the file. 955 error: The function to call with any errors found. 956 """ 957 958 cppvar = get_header_guard_cpp_variable(filename) 959 960 ifndef = None 961 ifndef_line_number = 0 962 define = None 963 for line_number, line in enumerate(lines): 964 line_split = line.split() 965 if len(line_split) >= 2: 966 # find the first occurrence of #ifndef and #define, save arg 967 if not ifndef and line_split[0] == '#ifndef': 968 # set ifndef to the header guard presented on the #ifndef line. 969 ifndef = line_split[1] 970 ifndef_line_number = line_number 971 if not define and line_split[0] == '#define': 972 define = line_split[1] 973 if define and ifndef: 974 break 975 976 if not ifndef or not define or ifndef != define: 977 error(0, 'build/header_guard', 5, 978 'No #ifndef header guard found, suggested CPP variable is: %s' % 979 cppvar[0]) 980 return 981 982 # The guard should be File_h. 983 if ifndef not in cppvar: 984 error(ifndef_line_number, 'build/header_guard', 5, 985 '#ifndef header guard has wrong style, please use: %s' % cppvar[0]) 986 987 988 def check_for_unicode_replacement_characters(lines, error): 989 """Logs an error for each line containing Unicode replacement characters. 990 991 These indicate that either the file contained invalid UTF-8 (likely) 992 or Unicode replacement characters (which it shouldn't). Note that 993 it's possible for this to throw off line numbering if the invalid 994 UTF-8 occurred adjacent to a newline. 995 996 Args: 997 lines: An array of strings, each representing a line of the file. 998 error: The function to call with any errors found. 999 """ 1000 for line_number, line in enumerate(lines): 1001 if u'\ufffd' in line: 1002 error(line_number, 'readability/utf8', 5, 1003 'Line contains invalid UTF-8 (or Unicode replacement character).') 1004 1005 1006 def check_for_new_line_at_eof(lines, error): 1007 """Logs an error if there is no newline char at the end of the file. 1008 1009 Args: 1010 lines: An array of strings, each representing a line of the file. 1011 error: The function to call with any errors found. 1012 """ 1013 1014 # The array lines() was created by adding two newlines to the 1015 # original file (go figure), then splitting on \n. 1016 # To verify that the file ends in \n, we just have to make sure the 1017 # last-but-two element of lines() exists and is empty. 1018 if len(lines) < 3 or lines[-2]: 1019 error(len(lines) - 2, 'whitespace/ending_newline', 5, 1020 'Could not find a newline character at the end of the file.') 1021 1022 1023 def check_for_multiline_comments_and_strings(clean_lines, line_number, error): 1024 """Logs an error if we see /* ... */ or "..." that extend past one line. 1025 1026 /* ... */ comments are legit inside macros, for one line. 1027 Otherwise, we prefer // comments, so it's ok to warn about the 1028 other. Likewise, it's ok for strings to extend across multiple 1029 lines, as long as a line continuation character (backslash) 1030 terminates each line. Although not currently prohibited by the C++ 1031 style guide, it's ugly and unnecessary. We don't do well with either 1032 in this lint program, so we warn about both. 1033 1034 Args: 1035 clean_lines: A CleansedLines instance containing the file. 1036 line_number: The number of the line to check. 1037 error: The function to call with any errors found. 1038 """ 1039 line = clean_lines.elided[line_number] 1040 1041 # Remove all \\ (escaped backslashes) from the line. They are OK, and the 1042 # second (escaped) slash may trigger later \" detection erroneously. 1043 line = line.replace('\\\\', '') 1044 1045 if line.count('/*') > line.count('*/'): 1046 error(line_number, 'readability/multiline_comment', 5, 1047 'Complex multi-line /*...*/-style comment found. ' 1048 'Lint may give bogus warnings. ' 1049 'Consider replacing these with //-style comments, ' 1050 'with #if 0...#endif, ' 1051 'or with more clearly structured multi-line comments.') 1052 1053 if (line.count('"') - line.count('\\"')) % 2: 1054 error(line_number, 'readability/multiline_string', 5, 1055 'Multi-line string ("...") found. This lint script doesn\'t ' 1056 'do well with such strings, and may give bogus warnings. They\'re ' 1057 'ugly and unnecessary, and you should use concatenation instead".') 1058 1059 1060 _THREADING_LIST = ( 1061 ('asctime(', 'asctime_r('), 1062 ('ctime(', 'ctime_r('), 1063 ('getgrgid(', 'getgrgid_r('), 1064 ('getgrnam(', 'getgrnam_r('), 1065 ('getlogin(', 'getlogin_r('), 1066 ('getpwnam(', 'getpwnam_r('), 1067 ('getpwuid(', 'getpwuid_r('), 1068 ('gmtime(', 'gmtime_r('), 1069 ('localtime(', 'localtime_r('), 1070 ('rand(', 'rand_r('), 1071 ('readdir(', 'readdir_r('), 1072 ('strtok(', 'strtok_r('), 1073 ('ttyname(', 'ttyname_r('), 1074 ) 1075 1076 1077 def check_posix_threading(clean_lines, line_number, error): 1078 """Checks for calls to thread-unsafe functions. 1079 1080 Much code has been originally written without consideration of 1081 multi-threading. Also, engineers are relying on their old experience; 1082 they have learned posix before threading extensions were added. These 1083 tests guide the engineers to use thread-safe functions (when using 1084 posix directly). 1085 1086 Args: 1087 clean_lines: A CleansedLines instance containing the file. 1088 line_number: The number of the line to check. 1089 error: The function to call with any errors found. 1090 """ 1091 line = clean_lines.elided[line_number] 1092 for single_thread_function, multithread_safe_function in _THREADING_LIST: 1093 index = line.find(single_thread_function) 1094 # Comparisons made explicit for clarity 1095 if index >= 0 and (index == 0 or (not line[index - 1].isalnum() 1096 and line[index - 1] not in ('_', '.', '>'))): 1097 error(line_number, 'runtime/threadsafe_fn', 2, 1098 'Consider using ' + multithread_safe_function + 1099 '...) instead of ' + single_thread_function + 1100 '...) for improved thread safety.') 1101 1102 1103 # Matches invalid increment: *count++, which moves pointer instead of 1104 # incrementing a value. 1105 _RE_PATTERN_INVALID_INCREMENT = re.compile( 1106 r'^\s*\*\w+(\+\+|--);') 1107 1108 1109 def check_invalid_increment(clean_lines, line_number, error): 1110 """Checks for invalid increment *count++. 1111 1112 For example following function: 1113 void increment_counter(int* count) { 1114 *count++; 1115 } 1116 is invalid, because it effectively does count++, moving pointer, and should 1117 be replaced with ++*count, (*count)++ or *count += 1. 1118 1119 Args: 1120 clean_lines: A CleansedLines instance containing the file. 1121 line_number: The number of the line to check. 1122 error: The function to call with any errors found. 1123 """ 1124 line = clean_lines.elided[line_number] 1125 if _RE_PATTERN_INVALID_INCREMENT.match(line): 1126 error(line_number, 'runtime/invalid_increment', 5, 1127 'Changing pointer instead of value (or unused value of operator*).') 1128 1129 1130 class _ClassInfo(object): 1131 """Stores information about a class.""" 1132 1133 def __init__(self, name, line_number): 1134 self.name = name 1135 self.line_number = line_number 1136 self.seen_open_brace = False 1137 self.is_derived = False 1138 self.virtual_method_line_number = None 1139 self.has_virtual_destructor = False 1140 self.brace_depth = 0 1141 self.unsigned_bitfields = [] 1142 self.bool_bitfields = [] 1143 1144 1145 class _ClassState(object): 1146 """Holds the current state of the parse relating to class declarations. 1147 1148 It maintains a stack of _ClassInfos representing the parser's guess 1149 as to the current nesting of class declarations. The innermost class 1150 is at the top (back) of the stack. Typically, the stack will either 1151 be empty or have exactly one entry. 1152 """ 1153 1154 def __init__(self): 1155 self.classinfo_stack = [] 1156 1157 def check_finished(self, error): 1158 """Checks that all classes have been completely parsed. 1159 1160 Call this when all lines in a file have been processed. 1161 Args: 1162 error: The function to call with any errors found. 1163 """ 1164 if self.classinfo_stack: 1165 # Note: This test can result in false positives if #ifdef constructs 1166 # get in the way of brace matching. See the testBuildClass test in 1167 # cpp_style_unittest.py for an example of this. 1168 error(self.classinfo_stack[0].line_number, 'build/class', 5, 1169 'Failed to find complete declaration of class %s' % 1170 self.classinfo_stack[0].name) 1171 1172 1173 class _FileState(object): 1174 def __init__(self, clean_lines, file_extension): 1175 self._did_inside_namespace_indent_warning = False 1176 self._clean_lines = clean_lines 1177 if file_extension in ['m', 'mm']: 1178 self._is_objective_c = True 1179 self._is_c = False 1180 elif file_extension == 'h': 1181 # In the case of header files, it is unknown if the file 1182 # is c / objective c or not, so set this value to None and then 1183 # if it is requested, use heuristics to guess the value. 1184 self._is_objective_c = None 1185 self._is_c = None 1186 elif file_extension == 'c': 1187 self._is_c = True 1188 self._is_objective_c = False 1189 else: 1190 self._is_objective_c = False 1191 self._is_c = False 1192 1193 def set_did_inside_namespace_indent_warning(self): 1194 self._did_inside_namespace_indent_warning = True 1195 1196 def did_inside_namespace_indent_warning(self): 1197 return self._did_inside_namespace_indent_warning 1198 1199 def is_objective_c(self): 1200 if self._is_objective_c is None: 1201 for line in self._clean_lines.elided: 1202 # Starting with @ or #import seem like the best indications 1203 # that we have an Objective C file. 1204 if line.startswith("@") or line.startswith("#import"): 1205 self._is_objective_c = True 1206 break 1207 else: 1208 self._is_objective_c = False 1209 return self._is_objective_c 1210 1211 def is_c(self): 1212 if self._is_c is None: 1213 for line in self._clean_lines.lines: 1214 # if extern "C" is found, then it is a good indication 1215 # that we have a C header file. 1216 if line.startswith('extern "C"'): 1217 self._is_c = True 1218 break 1219 else: 1220 self._is_c = False 1221 return self._is_c 1222 1223 def is_c_or_objective_c(self): 1224 """Return whether the file extension corresponds to C or Objective-C.""" 1225 return self.is_c() or self.is_objective_c() 1226 1227 1228 class _EnumState(object): 1229 """Maintains whether currently in an enum declaration, and checks whether 1230 enum declarations follow the style guide. 1231 """ 1232 1233 def __init__(self): 1234 self.in_enum_decl = False 1235 self.is_webidl_enum = False 1236 1237 def process_clean_line(self, line): 1238 # FIXME: The regular expressions for expr_all_uppercase and expr_enum_end only accept integers 1239 # and identifiers for the value of the enumerator, but do not accept any other constant 1240 # expressions. However, this is sufficient for now (11/27/2012). 1241 expr_all_uppercase = r'\s*[A-Z0-9_]+\s*(?:=\s*[a-zA-Z0-9]+\s*)?,?\s*$' 1242 expr_starts_lowercase = r'\s*[a-z]' 1243 expr_enum_end = r'}\s*(?:[a-zA-Z0-9]+\s*(?:=\s*[a-zA-Z0-9]+)?)?\s*;\s*' 1244 expr_enum_start = r'\s*enum(?:\s+[a-zA-Z0-9]+)?\s*\{?\s*' 1245 if self.in_enum_decl: 1246 if match(r'\s*' + expr_enum_end + r'$', line): 1247 self.in_enum_decl = False 1248 self.is_webidl_enum = False 1249 elif match(expr_all_uppercase, line): 1250 return self.is_webidl_enum 1251 elif match(expr_starts_lowercase, line): 1252 return False 1253 else: 1254 matched = match(expr_enum_start + r'$', line) 1255 if matched: 1256 self.in_enum_decl = True 1257 else: 1258 matched = match(expr_enum_start + r'(?P<members>.*)' + expr_enum_end + r'$', line) 1259 if matched: 1260 members = matched.group('members').split(',') 1261 found_invalid_member = False 1262 for member in members: 1263 if match(expr_all_uppercase, member): 1264 found_invalid_member = not self.is_webidl_enum 1265 if match(expr_starts_lowercase, member): 1266 found_invalid_member = True 1267 if found_invalid_member: 1268 self.is_webidl_enum = False 1269 return False 1270 return True 1271 return True 1272 1273 def check_for_non_standard_constructs(clean_lines, line_number, 1274 class_state, error): 1275 """Logs an error if we see certain non-ANSI constructs ignored by gcc-2. 1276 1277 Complain about several constructs which gcc-2 accepts, but which are 1278 not standard C++. Warning about these in lint is one way to ease the 1279 transition to new compilers. 1280 - put storage class first (e.g. "static const" instead of "const static"). 1281 - "%lld" instead of %qd" in printf-type functions. 1282 - "%1$d" is non-standard in printf-type functions. 1283 - "\%" is an undefined character escape sequence. 1284 - text after #endif is not allowed. 1285 - invalid inner-style forward declaration. 1286 - >? and <? operators, and their >?= and <?= cousins. 1287 - classes with virtual methods need virtual destructors (compiler warning 1288 available, but not turned on yet.) 1289 1290 Additionally, check for constructor/destructor style violations as it 1291 is very convenient to do so while checking for gcc-2 compliance. 1292 1293 Args: 1294 clean_lines: A CleansedLines instance containing the file. 1295 line_number: The number of the line to check. 1296 class_state: A _ClassState instance which maintains information about 1297 the current stack of nested class declarations being parsed. 1298 error: A callable to which errors are reported, which takes parameters: 1299 line number, error level, and message 1300 """ 1301 1302 # Remove comments from the line, but leave in strings for now. 1303 line = clean_lines.lines[line_number] 1304 1305 if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line): 1306 error(line_number, 'runtime/printf_format', 3, 1307 '%q in format strings is deprecated. Use %ll instead.') 1308 1309 if search(r'printf\s*\(.*".*%\d+\$', line): 1310 error(line_number, 'runtime/printf_format', 2, 1311 '%N$ formats are unconventional. Try rewriting to avoid them.') 1312 1313 # Remove escaped backslashes before looking for undefined escapes. 1314 line = line.replace('\\\\', '') 1315 1316 if search(r'("|\').*\\(%|\[|\(|{)', line): 1317 error(line_number, 'build/printf_format', 3, 1318 '%, [, (, and { are undefined character escapes. Unescape them.') 1319 1320 # For the rest, work with both comments and strings removed. 1321 line = clean_lines.elided[line_number] 1322 1323 if search(r'\b(const|volatile|void|char|short|int|long' 1324 r'|float|double|signed|unsigned' 1325 r'|schar|u?int8|u?int16|u?int32|u?int64)' 1326 r'\s+(auto|register|static|extern|typedef)\b', 1327 line): 1328 error(line_number, 'build/storage_class', 5, 1329 'Storage class (static, extern, typedef, etc) should be first.') 1330 1331 if match(r'\s*#\s*endif\s*[^/\s]+', line): 1332 error(line_number, 'build/endif_comment', 5, 1333 'Uncommented text after #endif is non-standard. Use a comment.') 1334 1335 if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line): 1336 error(line_number, 'build/forward_decl', 5, 1337 'Inner-style forward declarations are invalid. Remove this line.') 1338 1339 if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line): 1340 error(line_number, 'build/deprecated', 3, 1341 '>? and <? (max and min) operators are non-standard and deprecated.') 1342 1343 # Track class entry and exit, and attempt to find cases within the 1344 # class declaration that don't meet the C++ style 1345 # guidelines. Tracking is very dependent on the code matching Google 1346 # style guidelines, but it seems to perform well enough in testing 1347 # to be a worthwhile addition to the checks. 1348 classinfo_stack = class_state.classinfo_stack 1349 # Look for a class declaration 1350 class_decl_match = match( 1351 r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line) 1352 if class_decl_match: 1353 classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number)) 1354 1355 # Everything else in this function uses the top of the stack if it's 1356 # not empty. 1357 if not classinfo_stack: 1358 return 1359 1360 classinfo = classinfo_stack[-1] 1361 1362 # If the opening brace hasn't been seen look for it and also 1363 # parent class declarations. 1364 if not classinfo.seen_open_brace: 1365 # If the line has a ';' in it, assume it's a forward declaration or 1366 # a single-line class declaration, which we won't process. 1367 if line.find(';') != -1: 1368 classinfo_stack.pop() 1369 return 1370 classinfo.seen_open_brace = (line.find('{') != -1) 1371 # Look for a bare ':' 1372 if search('(^|[^:]):($|[^:])', line): 1373 classinfo.is_derived = True 1374 if not classinfo.seen_open_brace: 1375 return # Everything else in this function is for after open brace 1376 1377 # The class may have been declared with namespace or classname qualifiers. 1378 # The constructor and destructor will not have those qualifiers. 1379 base_classname = classinfo.name.split('::')[-1] 1380 1381 # Look for single-argument constructors that aren't marked explicit. 1382 # Technically a valid construct, but against style. 1383 args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)' 1384 % re.escape(base_classname), 1385 line) 1386 if (args 1387 and args.group(1) != 'void' 1388 and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname), 1389 args.group(1).strip())): 1390 error(line_number, 'runtime/explicit', 5, 1391 'Single-argument constructors should be marked explicit.') 1392 1393 # Look for methods declared virtual. 1394 if search(r'\bvirtual\b', line): 1395 classinfo.virtual_method_line_number = line_number 1396 # Only look for a destructor declaration on the same line. It would 1397 # be extremely unlikely for the destructor declaration to occupy 1398 # more than one line. 1399 if search(r'~%s\s*\(' % base_classname, line): 1400 classinfo.has_virtual_destructor = True 1401 1402 # Look for class end. 1403 brace_depth = classinfo.brace_depth 1404 brace_depth = brace_depth + line.count('{') - line.count('}') 1405 if brace_depth <= 0: 1406 classinfo = classinfo_stack.pop() 1407 # Try to detect missing virtual destructor declarations. 1408 # For now, only warn if a non-derived class with virtual methods lacks 1409 # a virtual destructor. This is to make it less likely that people will 1410 # declare derived virtual destructors without declaring the base 1411 # destructor virtual. 1412 if ((classinfo.virtual_method_line_number is not None) 1413 and (not classinfo.has_virtual_destructor) 1414 and (not classinfo.is_derived)): # Only warn for base classes 1415 error(classinfo.line_number, 'runtime/virtual', 4, 1416 'The class %s probably needs a virtual destructor due to ' 1417 'having virtual method(s), one declared at line %d.' 1418 % (classinfo.name, classinfo.virtual_method_line_number)) 1419 # Look for mixed bool and unsigned bitfields. 1420 if (classinfo.bool_bitfields and classinfo.unsigned_bitfields): 1421 bool_list = ', '.join(classinfo.bool_bitfields) 1422 unsigned_list = ', '.join(classinfo.unsigned_bitfields) 1423 error(classinfo.line_number, 'runtime/bitfields', 5, 1424 'The class %s contains mixed unsigned and bool bitfields, ' 1425 'which will pack into separate words on the MSVC compiler.\n' 1426 'Bool bitfields are [%s].\nUnsigned bitfields are [%s].\n' 1427 'Consider converting bool bitfields to unsigned.' 1428 % (classinfo.name, bool_list, unsigned_list)) 1429 else: 1430 classinfo.brace_depth = brace_depth 1431 1432 well_typed_bitfield = False; 1433 # Look for bool <name> : 1 declarations. 1434 args = search(r'\bbool\s+(\S*)\s*:\s*\d+\s*;', line) 1435 if args: 1436 classinfo.bool_bitfields.append('%d: %s' % (line_number, args.group(1))) 1437 well_typed_bitfield = True; 1438 1439 # Look for unsigned <name> : n declarations. 1440 args = search(r'\bunsigned\s+(?:int\s+)?(\S+)\s*:\s*\d+\s*;', line) 1441 if args: 1442 classinfo.unsigned_bitfields.append('%d: %s' % (line_number, args.group(1))) 1443 well_typed_bitfield = True; 1444 1445 # Look for other bitfield declarations. We don't care about those in 1446 # size-matching structs. 1447 if not (well_typed_bitfield or classinfo.name.startswith('SameSizeAs') or 1448 classinfo.name.startswith('Expected')): 1449 args = match(r'\s*(\S+)\s+(\S+)\s*:\s*\d+\s*;', line) 1450 if args: 1451 error(line_number, 'runtime/bitfields', 4, 1452 'Member %s of class %s defined as a bitfield of type %s. ' 1453 'Please declare all bitfields as unsigned.' 1454 % (args.group(2), classinfo.name, args.group(1))) 1455 1456 def check_spacing_for_function_call(line, line_number, error): 1457 """Checks for the correctness of various spacing around function calls. 1458 1459 Args: 1460 line: The text of the line to check. 1461 line_number: The number of the line to check. 1462 error: The function to call with any errors found. 1463 """ 1464 1465 # Since function calls often occur inside if/for/foreach/while/switch 1466 # expressions - which have their own, more liberal conventions - we 1467 # first see if we should be looking inside such an expression for a 1468 # function call, to which we can apply more strict standards. 1469 function_call = line # if there's no control flow construct, look at whole line 1470 for pattern in (r'\bif\s*\((.*)\)\s*{', 1471 r'\bfor\s*\((.*)\)\s*{', 1472 r'\bforeach\s*\((.*)\)\s*{', 1473 r'\bwhile\s*\((.*)\)\s*[{;]', 1474 r'\bswitch\s*\((.*)\)\s*{'): 1475 matched = search(pattern, line) 1476 if matched: 1477 function_call = matched.group(1) # look inside the parens for function calls 1478 break 1479 1480 # Except in if/for/foreach/while/switch, there should never be space 1481 # immediately inside parens (eg "f( 3, 4 )"). We make an exception 1482 # for nested parens ( (a+b) + c ). Likewise, there should never be 1483 # a space before a ( when it's a function argument. I assume it's a 1484 # function argument when the char before the whitespace is legal in 1485 # a function name (alnum + _) and we're not starting a macro. Also ignore 1486 # pointers and references to arrays and functions coz they're too tricky: 1487 # we use a very simple way to recognize these: 1488 # " (something)(maybe-something)" or 1489 # " (something)(maybe-something," or 1490 # " (something)[something]" 1491 # Note that we assume the contents of [] to be short enough that 1492 # they'll never need to wrap. 1493 if ( # Ignore control structures. 1494 not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call) 1495 # Ignore pointers/references to functions. 1496 and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call) 1497 # Ignore pointers/references to arrays. 1498 and not search(r' \([^)]+\)\[[^\]]+\]', function_call)): 1499 if search(r'\w\s*\([ \t](?!\s*\\$)', function_call): # a ( used for a fn call 1500 error(line_number, 'whitespace/parens', 4, 1501 'Extra space after ( in function call') 1502 elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call): 1503 error(line_number, 'whitespace/parens', 2, 1504 'Extra space after (') 1505 if (search(r'\w\s+\(', function_call) 1506 and not match(r'\s*(#|typedef)', function_call)): 1507 error(line_number, 'whitespace/parens', 4, 1508 'Extra space before ( in function call') 1509 # If the ) is followed only by a newline or a { + newline, assume it's 1510 # part of a control statement (if/while/etc), and don't complain 1511 if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call): 1512 error(line_number, 'whitespace/parens', 2, 1513 'Extra space before )') 1514 1515 1516 def is_blank_line(line): 1517 """Returns true if the given line is blank. 1518 1519 We consider a line to be blank if the line is empty or consists of 1520 only white spaces. 1521 1522 Args: 1523 line: A line of a string. 1524 1525 Returns: 1526 True, if the given line is blank. 1527 """ 1528 return not line or line.isspace() 1529 1530 1531 def detect_functions(clean_lines, line_number, function_state, error): 1532 """Finds where functions start and end. 1533 1534 Uses a simplistic algorithm assuming other style guidelines 1535 (especially spacing) are followed. 1536 Trivial bodies are unchecked, so constructors with huge initializer lists 1537 may be missed. 1538 1539 Args: 1540 clean_lines: A CleansedLines instance containing the file. 1541 line_number: The number of the line to check. 1542 function_state: Current function name and lines in body so far. 1543 error: The function to call with any errors found. 1544 """ 1545 # Are we now past the end of a function? 1546 if function_state.end_position.row + 1 == line_number: 1547 function_state.end() 1548 1549 # If we're in a function, don't try to detect a new one. 1550 if function_state.in_a_function: 1551 return 1552 1553 lines = clean_lines.lines 1554 line = lines[line_number] 1555 raw = clean_lines.raw_lines 1556 raw_line = raw[line_number] 1557 1558 # Lines ending with a \ indicate a macro. Don't try to check them. 1559 if raw_line.endswith('\\'): 1560 return 1561 1562 regexp = r'\s*(\w(\w|::|\*|\&|\s|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(' # decls * & space::name( ... 1563 match_result = match(regexp, line) 1564 if not match_result: 1565 return 1566 1567 # If the name is all caps and underscores, figure it's a macro and 1568 # ignore it, unless it's TEST or TEST_F. 1569 function_name = match_result.group(1).split()[-1] 1570 if function_name != 'TEST' and function_name != 'TEST_F' and match(r'[A-Z_]+$', function_name): 1571 return 1572 1573 joined_line = '' 1574 for start_line_number in xrange(line_number, clean_lines.num_lines()): 1575 start_line = clean_lines.elided[start_line_number] 1576 joined_line += ' ' + start_line.lstrip() 1577 body_match = search(r'{|;', start_line) 1578 if body_match: 1579 body_start_position = Position(start_line_number, body_match.start(0)) 1580 1581 # Replace template constructs with _ so that no spaces remain in the function name, 1582 # while keeping the column numbers of other characters the same as "line". 1583 line_with_no_templates = iteratively_replace_matches_with_char(r'<[^<>]*>', '_', line) 1584 match_function = search(r'((\w|:|<|>|,|~|(operator\s*(/|-|=|!|\+)+))*)\(', line_with_no_templates) 1585 if not match_function: 1586 return # The '(' must have been inside of a template. 1587 1588 # Use the column numbers from the modified line to find the 1589 # function name in the original line. 1590 function = line[match_function.start(1):match_function.end(1)] 1591 function_name_start_position = Position(line_number, match_function.start(1)) 1592 1593 if match(r'TEST', function): # Handle TEST... macros 1594 parameter_regexp = search(r'(\(.*\))', joined_line) 1595 if parameter_regexp: # Ignore bad syntax 1596 function += parameter_regexp.group(1) 1597 else: 1598 function += '()' 1599 1600 parameter_start_position = Position(line_number, match_function.end(1)) 1601 parameter_end_position = close_expression(clean_lines.elided, parameter_start_position) 1602 if parameter_end_position.row == len(clean_lines.elided): 1603 # No end was found. 1604 return 1605 1606 if start_line[body_start_position.column] == ';': 1607 end_position = Position(body_start_position.row, body_start_position.column + 1) 1608 else: 1609 end_position = close_expression(clean_lines.elided, body_start_position) 1610 1611 # Check for nonsensical positions. (This happens in test cases which check code snippets.) 1612 if parameter_end_position > body_start_position: 1613 return 1614 1615 function_state.begin(function, function_name_start_position, body_start_position, end_position, 1616 parameter_start_position, parameter_end_position, clean_lines) 1617 return 1618 1619 # No body for the function (or evidence of a non-function) was found. 1620 error(line_number, 'readability/fn_size', 5, 1621 'Lint failed to find start of function body.') 1622 1623 1624 def check_for_function_lengths(clean_lines, line_number, function_state, error): 1625 """Reports for long function bodies. 1626 1627 For an overview why this is done, see: 1628 http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions 1629 1630 Blank/comment lines are not counted so as to avoid encouraging the removal 1631 of vertical space and commments just to get through a lint check. 1632 NOLINT *on the last line of a function* disables this check. 1633 1634 Args: 1635 clean_lines: A CleansedLines instance containing the file. 1636 line_number: The number of the line to check. 1637 function_state: Current function name and lines in body so far. 1638 error: The function to call with any errors found. 1639 """ 1640 lines = clean_lines.lines 1641 line = lines[line_number] 1642 raw = clean_lines.raw_lines 1643 raw_line = raw[line_number] 1644 1645 if function_state.end_position.row == line_number: # last line 1646 if not search(r'\bNOLINT\b', raw_line): 1647 function_state.check(error, line_number) 1648 elif not match(r'^\s*$', line): 1649 function_state.count(line_number) # Count non-blank/non-comment lines. 1650 1651 1652 def _check_parameter_name_against_text(parameter, text, error): 1653 """Checks to see if the parameter name is contained within the text. 1654 1655 Return false if the check failed (i.e. an error was produced). 1656 """ 1657 1658 # Treat 'lower with underscores' as a canonical form because it is 1659 # case insensitive while still retaining word breaks. (This ensures that 1660 # 'elate' doesn't look like it is duplicating of 'NateLate'.) 1661 canonical_parameter_name = parameter.lower_with_underscores_name() 1662 1663 # Appends "object" to all text to catch variables that did the same (but only 1664 # do this when the parameter name is more than a single character to avoid 1665 # flagging 'b' which may be an ok variable when used in an rgba function). 1666 if len(canonical_parameter_name) > 1: 1667 text = sub(r'(\w)\b', r'\1Object', text) 1668 canonical_text = _convert_to_lower_with_underscores(text) 1669 1670 # Used to detect cases like ec for ExceptionCode. 1671 acronym = _create_acronym(text).lower() 1672 if canonical_text.find(canonical_parameter_name) != -1 or acronym.find(canonical_parameter_name) != -1: 1673 error(parameter.row, 'readability/parameter_name', 5, 1674 'The parameter name "%s" adds no information, so it should be removed.' % parameter.name) 1675 return False 1676 return True 1677 1678 1679 def check_function_definition_and_pass_ptr(type_text, row, location_description, error): 1680 """Check that function definitions for use Pass*Ptr instead of *Ptr. 1681 1682 Args: 1683 type_text: A string containing the type. (For return values, it may contain more than the type.) 1684 row: The row number of the type. 1685 location_description: Used to indicate where the type is. This is either 'parameter' or 'return'. 1686 error: The function to call with any errors found. 1687 """ 1688 match_ref_or_own_ptr = '(?=\W|^)(Ref|Own)Ptr(?=\W)' 1689 exceptions = '(?:&|\*|\*\s*=\s*0)$' 1690 bad_type_usage = search(match_ref_or_own_ptr, type_text) 1691 exception_usage = search(exceptions, type_text) 1692 if not bad_type_usage or exception_usage: 1693 return 1694 type_name = bad_type_usage.group(0) 1695 error(row, 'readability/pass_ptr', 5, 1696 'The %s type should use Pass%s instead of %s.' % (location_description, type_name, type_name)) 1697 1698 1699 def check_function_definition(filename, file_extension, clean_lines, line_number, function_state, error): 1700 """Check that function definitions for style issues. 1701 1702 Specifically, check that parameter names in declarations add information. 1703 1704 Args: 1705 filename: Filename of the file that is being processed. 1706 file_extension: The current file extension, without the leading dot. 1707 clean_lines: A CleansedLines instance containing the file. 1708 line_number: The number of the line to check. 1709 function_state: Current function name and lines in body so far. 1710 error: The function to call with any errors found. 1711 """ 1712 if line_number != function_state.body_start_position.row: 1713 return 1714 1715 modifiers_and_return_type = function_state.modifiers_and_return_type() 1716 if filename.find('/chromium/') != -1 and search(r'\bWEBKIT_EXPORT\b', modifiers_and_return_type): 1717 if filename.find('/chromium/public/') == -1 and filename.find('/chromium/tests/') == -1 and filename.find('chromium/platform') == -1: 1718 error(function_state.function_name_start_position.row, 'readability/webkit_export', 5, 1719 'WEBKIT_EXPORT should only appear in the chromium public (or tests) directory.') 1720 elif not file_extension == "h": 1721 error(function_state.function_name_start_position.row, 'readability/webkit_export', 5, 1722 'WEBKIT_EXPORT should only be used in header files.') 1723 elif not function_state.is_declaration or search(r'\binline\b', modifiers_and_return_type): 1724 error(function_state.function_name_start_position.row, 'readability/webkit_export', 5, 1725 'WEBKIT_EXPORT should not be used on a function with a body.') 1726 elif function_state.is_pure: 1727 error(function_state.function_name_start_position.row, 'readability/webkit_export', 5, 1728 'WEBKIT_EXPORT should not be used with a pure virtual function.') 1729 1730 check_function_definition_and_pass_ptr(modifiers_and_return_type, function_state.function_name_start_position.row, 'return', error) 1731 1732 parameter_list = function_state.parameter_list() 1733 for parameter in parameter_list: 1734 check_function_definition_and_pass_ptr(parameter.type, parameter.row, 'parameter', error) 1735 1736 # Do checks specific to function declarations and parameter names. 1737 if not function_state.is_declaration or not parameter.name: 1738 continue 1739 1740 # Check the parameter name against the function name for single parameter set functions. 1741 if len(parameter_list) == 1 and match('set[A-Z]', function_state.current_function): 1742 trimmed_function_name = function_state.current_function[len('set'):] 1743 if not _check_parameter_name_against_text(parameter, trimmed_function_name, error): 1744 continue # Since an error was noted for this name, move to the next parameter. 1745 1746 # Check the parameter name against the type. 1747 if not _check_parameter_name_against_text(parameter, parameter.type, error): 1748 continue # Since an error was noted for this name, move to the next parameter. 1749 1750 1751 def check_pass_ptr_usage(clean_lines, line_number, function_state, error): 1752 """Check for proper usage of Pass*Ptr. 1753 1754 Currently this is limited to detecting declarations of Pass*Ptr 1755 variables inside of functions. 1756 1757 Args: 1758 clean_lines: A CleansedLines instance containing the file. 1759 line_number: The number of the line to check. 1760 function_state: Current function name and lines in body so far. 1761 error: The function to call with any errors found. 1762 """ 1763 if not function_state.in_a_function: 1764 return 1765 1766 lines = clean_lines.lines 1767 line = lines[line_number] 1768 if line_number > function_state.body_start_position.row: 1769 matched_pass_ptr = match(r'^\s*Pass([A-Z][A-Za-z]*)Ptr<', line) 1770 if matched_pass_ptr: 1771 type_name = 'Pass%sPtr' % matched_pass_ptr.group(1) 1772 error(line_number, 'readability/pass_ptr', 5, 1773 'Local variables should never be %s (see ' 1774 'http://webkit.org/coding/RefPtr.html).' % type_name) 1775 1776 1777 def check_for_leaky_patterns(clean_lines, line_number, function_state, error): 1778 """Check for constructs known to be leak prone. 1779 Args: 1780 clean_lines: A CleansedLines instance containing the file. 1781 line_number: The number of the line to check. 1782 function_state: Current function name and lines in body so far. 1783 error: The function to call with any errors found. 1784 """ 1785 lines = clean_lines.lines 1786 line = lines[line_number] 1787 1788 matched_get_dc = search(r'\b(?P<function_name>GetDC(Ex)?)\s*\(', line) 1789 if matched_get_dc: 1790 error(line_number, 'runtime/leaky_pattern', 5, 1791 'Use the class HWndDC instead of calling %s to avoid potential ' 1792 'memory leaks.' % matched_get_dc.group('function_name')) 1793 1794 matched_create_dc = search(r'\b(?P<function_name>Create(Compatible)?DC)\s*\(', line) 1795 matched_own_dc = search(r'\badoptPtr\b', line) 1796 if matched_create_dc and not matched_own_dc: 1797 error(line_number, 'runtime/leaky_pattern', 5, 1798 'Use adoptPtr and OwnPtr<HDC> when calling %s to avoid potential ' 1799 'memory leaks.' % matched_create_dc.group('function_name')) 1800 1801 1802 def check_spacing(file_extension, clean_lines, line_number, error): 1803 """Checks for the correctness of various spacing issues in the code. 1804 1805 Things we check for: spaces around operators, spaces after 1806 if/for/while/switch, no spaces around parens in function calls, two 1807 spaces between code and comment, don't start a block with a blank 1808 line, don't end a function with a blank line, don't have too many 1809 blank lines in a row. 1810 1811 Args: 1812 file_extension: The current file extension, without the leading dot. 1813 clean_lines: A CleansedLines instance containing the file. 1814 line_number: The number of the line to check. 1815 error: The function to call with any errors found. 1816 """ 1817 1818 raw = clean_lines.raw_lines 1819 line = raw[line_number] 1820 1821 # Before nixing comments, check if the line is blank for no good 1822 # reason. This includes the first line after a block is opened, and 1823 # blank lines at the end of a function (ie, right before a line like '}'). 1824 if is_blank_line(line): 1825 elided = clean_lines.elided 1826 previous_line = elided[line_number - 1] 1827 previous_brace = previous_line.rfind('{') 1828 # FIXME: Don't complain if line before blank line, and line after, 1829 # both start with alnums and are indented the same amount. 1830 # This ignores whitespace at the start of a namespace block 1831 # because those are not usually indented. 1832 if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1 1833 and previous_line[:previous_brace].find('namespace') == -1): 1834 # OK, we have a blank line at the start of a code block. Before we 1835 # complain, we check if it is an exception to the rule: The previous 1836 # non-empty line has the parameters of a function header that are indented 1837 # 4 spaces (because they did not fit in a 80 column line when placed on 1838 # the same line as the function name). We also check for the case where 1839 # the previous line is indented 6 spaces, which may happen when the 1840 # initializers of a constructor do not fit into a 80 column line. 1841 exception = False 1842 if match(r' {6}\w', previous_line): # Initializer list? 1843 # We are looking for the opening column of initializer list, which 1844 # should be indented 4 spaces to cause 6 space indentation afterwards. 1845 search_position = line_number - 2 1846 while (search_position >= 0 1847 and match(r' {6}\w', elided[search_position])): 1848 search_position -= 1 1849 exception = (search_position >= 0 1850 and elided[search_position][:5] == ' :') 1851 else: 1852 # Search for the function arguments or an initializer list. We use a 1853 # simple heuristic here: If the line is indented 4 spaces; and we have a 1854 # closing paren, without the opening paren, followed by an opening brace 1855 # or colon (for initializer lists) we assume that it is the last line of 1856 # a function header. If we have a colon indented 4 spaces, it is an 1857 # initializer list. 1858 exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', 1859 previous_line) 1860 or match(r' {4}:', previous_line)) 1861 1862 if not exception: 1863 error(line_number, 'whitespace/blank_line', 2, 1864 'Blank line at the start of a code block. Is this needed?') 1865 # This doesn't ignore whitespace at the end of a namespace block 1866 # because that is too hard without pairing open/close braces; 1867 # however, a special exception is made for namespace closing 1868 # brackets which have a comment containing "namespace". 1869 # 1870 # Also, ignore blank lines at the end of a block in a long if-else 1871 # chain, like this: 1872 # if (condition1) { 1873 # // Something followed by a blank line 1874 # 1875 # } else if (condition2) { 1876 # // Something else 1877 # } 1878 if line_number + 1 < clean_lines.num_lines(): 1879 next_line = raw[line_number + 1] 1880 if (next_line 1881 and match(r'\s*}', next_line) 1882 and next_line.find('namespace') == -1 1883 and next_line.find('} else ') == -1): 1884 error(line_number, 'whitespace/blank_line', 3, 1885 'Blank line at the end of a code block. Is this needed?') 1886 1887 # Next, we check for proper spacing with respect to comments. 1888 comment_position = line.find('//') 1889 if comment_position != -1: 1890 # Check if the // may be in quotes. If so, ignore it 1891 # Comparisons made explicit for clarity 1892 if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0: # not in quotes 1893 # Allow one space before end of line comment. 1894 if (not match(r'^\s*$', line[:comment_position]) 1895 and (comment_position >= 1 1896 and ((line[comment_position - 1] not in string.whitespace) 1897 or (comment_position >= 2 1898 and line[comment_position - 2] in string.whitespace)))): 1899 error(line_number, 'whitespace/comments', 5, 1900 'One space before end of line comments') 1901 # There should always be a space between the // and the comment 1902 commentend = comment_position + 2 1903 if commentend < len(line) and not line[commentend] == ' ': 1904 # but some lines are exceptions -- e.g. if they're big 1905 # comment delimiters like: 1906 # //---------------------------------------------------------- 1907 # or they begin with multiple slashes followed by a space: 1908 # //////// Header comment 1909 matched = (search(r'[=/-]{4,}\s*$', line[commentend:]) 1910 or search(r'^/+ ', line[commentend:])) 1911 if not matched: 1912 error(line_number, 'whitespace/comments', 4, 1913 'Should have a space between // and comment') 1914 1915 # There should only be one space after punctuation in a comment. 1916 if search(r'[.!?,;:]\s\s+\w', line[comment_position:]): 1917 error(line_number, 'whitespace/comments', 5, 1918 'Should have only a single space after a punctuation in a comment.') 1919 1920 line = clean_lines.elided[line_number] # get rid of comments and strings 1921 1922 # Don't try to do spacing checks for operator methods 1923 line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>|\+=|-=|\*=|/=|%=|&=|\|=|^=|<<=|>>=|/)\(', 'operator\(', line) 1924 # Don't try to do spacing checks for #include or #import statements at 1925 # minimum because it messes up checks for spacing around / 1926 if match(r'\s*#\s*(?:include|import)', line): 1927 return 1928 if search(r'[\w.]=[\w.]', line): 1929 error(line_number, 'whitespace/operators', 4, 1930 'Missing spaces around =') 1931 1932 # FIXME: It's not ok to have spaces around binary operators like . 1933 1934 # You should always have whitespace around binary operators. 1935 # Alas, we can't test < or > because they're legitimately used sans spaces 1936 # (a->b, vector<int> a). The only time we can tell is a < with no >, and 1937 # only if it's not template params list spilling into the next line. 1938 matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line) 1939 if not matched: 1940 # Note that while it seems that the '<[^<]*' term in the following 1941 # regexp could be simplified to '<.*', which would indeed match 1942 # the same class of strings, the [^<] means that searching for the 1943 # regexp takes linear rather than quadratic time. 1944 if not search(r'<[^<]*,\s*$', line): # template params spill 1945 matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line) 1946 if matched: 1947 error(line_number, 'whitespace/operators', 3, 1948 'Missing spaces around %s' % matched.group(1)) 1949 1950 # There shouldn't be space around unary operators 1951 matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) 1952 if matched: 1953 error(line_number, 'whitespace/operators', 4, 1954 'Extra space for operator %s' % matched.group(1)) 1955 1956 # A pet peeve of mine: no spaces after an if, while, switch, or for 1957 matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line) 1958 if matched: 1959 error(line_number, 'whitespace/parens', 5, 1960 'Missing space before ( in %s' % matched.group(1)) 1961 1962 # For if/for/foreach/while/switch, the left and right parens should be 1963 # consistent about how many spaces are inside the parens, and 1964 # there should either be zero or one spaces inside the parens. 1965 # We don't want: "if ( foo)" or "if ( foo )". 1966 # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. 1967 matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<remainder>.*)$', line) 1968 if matched: 1969 statement = matched.group('statement') 1970 condition, rest = up_to_unmatched_closing_paren(matched.group('remainder')) 1971 if condition is not None: 1972 condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition) 1973 if condition_match: 1974 n_leading = len(condition_match.group('leading')) 1975 n_trailing = len(condition_match.group('trailing')) 1976 if n_leading != 0: 1977 for_exception = statement == 'for' and condition.startswith(' ;') 1978 if not for_exception: 1979 error(line_number, 'whitespace/parens', 5, 1980 'Extra space after ( in %s' % statement) 1981 if n_trailing != 0: 1982 for_exception = statement == 'for' and condition.endswith('; ') 1983 if not for_exception: 1984 error(line_number, 'whitespace/parens', 5, 1985 'Extra space before ) in %s' % statement) 1986 1987 # Do not check for more than one command in macros 1988 in_preprocessor_directive = match(r'\s*#', line) 1989 if not in_preprocessor_directive and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest): 1990 error(line_number, 'whitespace/parens', 4, 1991 'More than one command on the same line in %s' % statement) 1992 1993 # You should always have a space after a comma (either as fn arg or operator) 1994 if search(r',[^\s]', line): 1995 error(line_number, 'whitespace/comma', 3, 1996 'Missing space after ,') 1997 1998 matched = search(r'^\s*(?P<token1>[a-zA-Z0-9_\*&]+)\s\s+(?P<token2>[a-zA-Z0-9_\*&]+)', line) 1999 if matched: 2000 error(line_number, 'whitespace/declaration', 3, 2001 'Extra space between %s and %s' % (matched.group('token1'), matched.group('token2'))) 2002 2003 if file_extension == 'cpp': 2004 # C++ should have the & or * beside the type not the variable name. 2005 matched = match(r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+', line) 2006 if matched: 2007 error(line_number, 'whitespace/declaration', 3, 2008 'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip())) 2009 2010 elif file_extension == 'c': 2011 # C Pointer declaration should have the * beside the variable not the type name. 2012 matched = search(r'^\s*\w+\*\s+\w+', line) 2013 if matched: 2014 error(line_number, 'whitespace/declaration', 3, 2015 'Declaration has space between * and variable name in %s' % matched.group(0).strip()) 2016 2017 # Next we will look for issues with function calls. 2018 check_spacing_for_function_call(line, line_number, error) 2019 2020 # Except after an opening paren, you should have spaces before your braces. 2021 # And since you should never have braces at the beginning of a line, this is 2022 # an easy test. 2023 if search(r'[^ ({]{', line): 2024 error(line_number, 'whitespace/braces', 5, 2025 'Missing space before {') 2026 2027 # Make sure '} else {' has spaces. 2028 if search(r'}else', line): 2029 error(line_number, 'whitespace/braces', 5, 2030 'Missing space before else') 2031 2032 # You shouldn't have spaces before your brackets, except maybe after 2033 # 'delete []' or 'new char * []'. 2034 if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line): 2035 error(line_number, 'whitespace/braces', 5, 2036 'Extra space before [') 2037 2038 # There should always be a single space in between braces on the same line. 2039 if search(r'\{\}', line): 2040 error(line_number, 'whitespace/braces', 5, 'Missing space inside { }.') 2041 if search(r'\{\s\s+\}', line): 2042 error(line_number, 'whitespace/braces', 5, 'Too many spaces inside { }.') 2043 2044 # You shouldn't have a space before a semicolon at the end of the line. 2045 # There's a special case for "for" since the style guide allows space before 2046 # the semicolon there. 2047 if search(r':\s*;\s*$', line): 2048 error(line_number, 'whitespace/semicolon', 5, 2049 'Semicolon defining empty statement. Use { } instead.') 2050 elif search(r'^\s*;\s*$', line): 2051 error(line_number, 'whitespace/semicolon', 5, 2052 'Line contains only semicolon. If this should be an empty statement, ' 2053 'use { } instead.') 2054 elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)): 2055 error(line_number, 'whitespace/semicolon', 5, 2056 'Extra space before last semicolon. If this should be an empty ' 2057 'statement, use { } instead.') 2058 elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line) 2059 and line.count('(') == line.count(')') 2060 # Allow do {} while(); 2061 and not search(r'}\s*while', line)): 2062 error(line_number, 'whitespace/semicolon', 5, 2063 'Semicolon defining empty statement for this loop. Use { } instead.') 2064 2065 2066 def get_previous_non_blank_line(clean_lines, line_number): 2067 """Return the most recent non-blank line and its line number. 2068 2069 Args: 2070 clean_lines: A CleansedLines instance containing the file contents. 2071 line_number: The number of the line to check. 2072 2073 Returns: 2074 A tuple with two elements. The first element is the contents of the last 2075 non-blank line before the current line, or the empty string if this is the 2076 first non-blank line. The second is the line number of that line, or -1 2077 if this is the first non-blank line. 2078 """ 2079 2080 previous_line_number = line_number - 1 2081 while previous_line_number >= 0: 2082 previous_line = clean_lines.elided[previous_line_number] 2083 if not is_blank_line(previous_line): # if not a blank line... 2084 return (previous_line, previous_line_number) 2085 previous_line_number -= 1 2086 return ('', -1) 2087 2088 2089 def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error): 2090 """Looks for indentation errors inside of namespaces. 2091 2092 Args: 2093 clean_lines: A CleansedLines instance containing the file. 2094 line_number: The number of the line to check. 2095 file_extension: The extension (dot not included) of the file. 2096 file_state: A _FileState instance which maintains information about 2097 the state of things in the file. 2098 error: The function to call with any errors found. 2099 """ 2100 2101 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2102 2103 namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line) 2104 if not namespace_match: 2105 return 2106 2107 current_indentation_level = len(namespace_match.group('namespace_indentation')) 2108 if current_indentation_level > 0: 2109 # Don't warn about an indented namespace if we already warned about indented code. 2110 if not file_state.did_inside_namespace_indent_warning(): 2111 error(line_number, 'whitespace/indent', 4, 2112 'namespace should never be indented.') 2113 return 2114 looking_for_semicolon = False; 2115 line_offset = 0 2116 in_preprocessor_directive = False; 2117 for current_line in clean_lines.elided[line_number + 1:]: 2118 line_offset += 1 2119 if not current_line.strip(): 2120 continue 2121 if not current_indentation_level: 2122 if not (in_preprocessor_directive or looking_for_semicolon): 2123 if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning(): 2124 file_state.set_did_inside_namespace_indent_warning() 2125 error(line_number + line_offset, 'whitespace/indent', 4, 2126 'Code inside a namespace should not be indented.') 2127 if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax. 2128 in_preprocessor_directive = current_line[-1] == '\\' 2129 else: 2130 looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\') 2131 else: 2132 looking_for_semicolon = False; # If we have a brace we may not need a semicolon. 2133 current_indentation_level += current_line.count('{') - current_line.count('}') 2134 if current_indentation_level < 0: 2135 break; 2136 2137 2138 def check_enum_casing(clean_lines, line_number, enum_state, error): 2139 """Looks for incorrectly named enum values. 2140 2141 Args: 2142 clean_lines: A CleansedLines instance containing the file. 2143 line_number: The number of the line to check. 2144 enum_state: A _EnumState instance which maintains enum declaration state. 2145 error: The function to call with any errors found. 2146 """ 2147 2148 enum_state.is_webidl_enum |= bool(match(r'\s*// Web(?:Kit)?IDL enum\s*$', clean_lines.raw_lines[line_number])) 2149 2150 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2151 if not enum_state.process_clean_line(line): 2152 error(line_number, 'readability/enum_casing', 4, 2153 'enum members should use InterCaps with an initial capital letter.') 2154 2155 def check_directive_indentation(clean_lines, line_number, file_state, error): 2156 """Looks for indentation of preprocessor directives. 2157 2158 Args: 2159 clean_lines: A CleansedLines instance containing the file. 2160 line_number: The number of the line to check. 2161 file_state: A _FileState instance which maintains information about 2162 the state of things in the file. 2163 error: The function to call with any errors found. 2164 """ 2165 2166 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2167 2168 indented_preprocessor_directives = match(r'\s+#', line) 2169 if not indented_preprocessor_directives: 2170 return 2171 2172 error(line_number, 'whitespace/indent', 4, 'preprocessor directives (e.g., #ifdef, #define, #import) should never be indented.') 2173 2174 2175 def get_initial_spaces_for_line(clean_line): 2176 initial_spaces = 0 2177 while initial_spaces < len(clean_line) and clean_line[initial_spaces] == ' ': 2178 initial_spaces += 1 2179 return initial_spaces 2180 2181 2182 def check_indentation_amount(clean_lines, line_number, error): 2183 line = clean_lines.elided[line_number] 2184 initial_spaces = get_initial_spaces_for_line(line) 2185 2186 if initial_spaces % 4: 2187 error(line_number, 'whitespace/indent', 3, 2188 'Weird number of spaces at line-start. Are you using a 4-space indent?') 2189 return 2190 2191 previous_line = get_previous_non_blank_line(clean_lines, line_number)[0] 2192 if not previous_line.strip() or match(r'\s*\w+\s*:\s*$', previous_line) or previous_line[0] == '#': 2193 return 2194 2195 previous_line_initial_spaces = get_initial_spaces_for_line(previous_line) 2196 if initial_spaces > previous_line_initial_spaces + 4: 2197 error(line_number, 'whitespace/indent', 3, 'When wrapping a line, only indent 4 spaces.') 2198 2199 2200 def check_using_std(clean_lines, line_number, file_state, error): 2201 """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'. 2202 2203 Args: 2204 clean_lines: A CleansedLines instance containing the file. 2205 line_number: The number of the line to check. 2206 file_state: A _FileState instance which maintains information about 2207 the state of things in the file. 2208 error: The function to call with any errors found. 2209 """ 2210 2211 # This check doesn't apply to C or Objective-C implementation files. 2212 if file_state.is_c_or_objective_c(): 2213 return 2214 2215 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2216 2217 using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line) 2218 if not using_std_match: 2219 return 2220 2221 method_name = using_std_match.group('method_name') 2222 error(line_number, 'build/using_std', 4, 2223 "Use 'using namespace std;' instead of 'using std::%s;'." % method_name) 2224 2225 2226 def check_max_min_macros(clean_lines, line_number, file_state, error): 2227 """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min(). 2228 2229 Args: 2230 clean_lines: A CleansedLines instance containing the file. 2231 line_number: The number of the line to check. 2232 file_state: A _FileState instance which maintains information about 2233 the state of things in the file. 2234 error: The function to call with any errors found. 2235 """ 2236 2237 # This check doesn't apply to C or Objective-C implementation files. 2238 if file_state.is_c_or_objective_c(): 2239 return 2240 2241 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2242 2243 max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line) 2244 if not max_min_macros_search: 2245 return 2246 2247 max_min_macro = max_min_macros_search.group('max_min_macro') 2248 max_min_macro_lower = max_min_macro.lower() 2249 error(line_number, 'runtime/max_min_macros', 4, 2250 'Use std::%s() or std::%s<type>() instead of the %s() macro.' 2251 % (max_min_macro_lower, max_min_macro_lower, max_min_macro)) 2252 2253 2254 def check_ctype_functions(clean_lines, line_number, file_state, error): 2255 """Looks for use of the standard functions in ctype.h and suggest they be replaced 2256 by use of equivilent ones in <wtf/ASCIICType.h>?. 2257 2258 Args: 2259 clean_lines: A CleansedLines instance containing the file. 2260 line_number: The number of the line to check. 2261 file_state: A _FileState instance which maintains information about 2262 the state of things in the file. 2263 error: The function to call with any errors found. 2264 """ 2265 2266 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2267 2268 ctype_function_search = search(r'\b(?P<ctype_function>(isalnum|isalpha|isascii|isblank|iscntrl|isdigit|isgraph|islower|isprint|ispunct|isspace|isupper|isxdigit|toascii|tolower|toupper))\s*\(', line) 2269 if not ctype_function_search: 2270 return 2271 2272 ctype_function = ctype_function_search.group('ctype_function') 2273 error(line_number, 'runtime/ctype_function', 4, 2274 'Use equivelent function in <wtf/ASCIICType.h> instead of the %s() function.' 2275 % (ctype_function)) 2276 2277 def check_switch_indentation(clean_lines, line_number, error): 2278 """Looks for indentation errors inside of switch statements. 2279 2280 Args: 2281 clean_lines: A CleansedLines instance containing the file. 2282 line_number: The number of the line to check. 2283 error: The function to call with any errors found. 2284 """ 2285 2286 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2287 2288 switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line) 2289 if not switch_match: 2290 return 2291 2292 switch_indentation = switch_match.group('switch_indentation') 2293 inner_indentation = switch_indentation + ' ' * 4 2294 line_offset = 0 2295 encountered_nested_switch = False 2296 2297 for current_line in clean_lines.elided[line_number + 1:]: 2298 line_offset += 1 2299 2300 # Skip not only empty lines but also those with preprocessor directives. 2301 if current_line.strip() == '' or current_line.startswith('#'): 2302 continue 2303 2304 if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line): 2305 # Complexity alarm - another switch statement nested inside the one 2306 # that we're currently testing. We'll need to track the extent of 2307 # that inner switch if the upcoming label tests are still supposed 2308 # to work correctly. Let's not do that; instead, we'll finish 2309 # checking this line, and then leave it like that. Assuming the 2310 # indentation is done consistently (even if incorrectly), this will 2311 # still catch all indentation issues in practice. 2312 encountered_nested_switch = True 2313 2314 current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line); 2315 current_indentation = current_indentation_match.group('indentation') 2316 remaining_line = current_indentation_match.group('remaining_line') 2317 2318 # End the check at the end of the switch statement. 2319 if remaining_line.startswith('}') and current_indentation == switch_indentation: 2320 break 2321 # Case and default branches should not be indented. The regexp also 2322 # catches single-line cases like "default: break;" but does not trigger 2323 # on stuff like "Document::Foo();". 2324 elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line): 2325 if current_indentation != switch_indentation: 2326 error(line_number + line_offset, 'whitespace/indent', 4, 2327 'A case label should not be indented, but line up with its switch statement.') 2328 # Don't throw an error for multiple badly indented labels, 2329 # one should be enough to figure out the problem. 2330 break 2331 # We ignore goto labels at the very beginning of a line. 2332 elif match(r'\w+\s*:\s*$', remaining_line): 2333 continue 2334 # It's not a goto label, so check if it's indented at least as far as 2335 # the switch statement plus one more level of indentation. 2336 elif not current_indentation.startswith(inner_indentation): 2337 error(line_number + line_offset, 'whitespace/indent', 4, 2338 'Non-label code inside switch statements should be indented.') 2339 # Don't throw an error for multiple badly indented statements, 2340 # one should be enough to figure out the problem. 2341 break 2342 2343 if encountered_nested_switch: 2344 break 2345 2346 2347 def check_braces(clean_lines, line_number, error): 2348 """Looks for misplaced braces (e.g. at the end of line). 2349 2350 Args: 2351 clean_lines: A CleansedLines instance containing the file. 2352 line_number: The number of the line to check. 2353 error: The function to call with any errors found. 2354 """ 2355 2356 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2357 2358 if match(r'\s*{\s*$', line): 2359 # We allow an open brace to start a line in the case where someone 2360 # is using braces for function definition or in a block to 2361 # explicitly create a new scope, which is commonly used to control 2362 # the lifetime of stack-allocated variables. We don't detect this 2363 # perfectly: we just don't complain if the last non-whitespace 2364 # character on the previous non-blank line is ';', ':', '{', '}', 2365 # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'. 2366 # We also allow '#' for #endif and '=' for array initialization. 2367 previous_line = get_previous_non_blank_line(clean_lines, line_number)[0] 2368 if ((not search(r'[;:}{)=]\s*$|\)\s*((const|OVERRIDE)\s*)*\s*$', previous_line) 2369 or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line)) 2370 and previous_line.find('#') < 0): 2371 error(line_number, 'whitespace/braces', 4, 2372 'This { should be at the end of the previous line') 2373 elif (search(r'\)\s*(((const|OVERRIDE)\s*)*\s*)?{\s*$', line) 2374 and line.count('(') == line.count(')') 2375 and not search(r'\b(if|for|foreach|while|switch)\b', line) 2376 and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)): 2377 error(line_number, 'whitespace/braces', 4, 2378 'Place brace on its own line for function definitions.') 2379 2380 # An else clause should be on the same line as the preceding closing brace. 2381 if match(r'\s*else\s*', line): 2382 previous_line = get_previous_non_blank_line(clean_lines, line_number)[0] 2383 if match(r'\s*}\s*$', previous_line): 2384 error(line_number, 'whitespace/newline', 4, 2385 'An else should appear on the same line as the preceding }') 2386 2387 # Likewise, an else should never have the else clause on the same line 2388 if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line): 2389 error(line_number, 'whitespace/newline', 4, 2390 'Else clause should never be on same line as else (use 2 lines)') 2391 2392 # In the same way, a do/while should never be on one line 2393 if match(r'\s*do [^\s{]', line): 2394 error(line_number, 'whitespace/newline', 4, 2395 'do/while clauses should not be on a single line') 2396 2397 # Braces shouldn't be followed by a ; unless they're defining a struct 2398 # or initializing an array. 2399 # We can't tell in general, but we can for some common cases. 2400 previous_line_number = line_number 2401 while True: 2402 (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number) 2403 if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'): 2404 line = previous_line + line 2405 else: 2406 break 2407 if (search(r'{.*}\s*;', line) 2408 and line.count('{') == line.count('}') 2409 and not search(r'struct|class|enum|\s*=\s*{', line)): 2410 error(line_number, 'readability/braces', 4, 2411 "You don't need a ; after a }") 2412 2413 2414 def check_exit_statement_simplifications(clean_lines, line_number, error): 2415 """Looks for else or else-if statements that should be written as an 2416 if statement when the prior if concludes with a return, break, continue or 2417 goto statement. 2418 2419 Args: 2420 clean_lines: A CleansedLines instance containing the file. 2421 line_number: The number of the line to check. 2422 error: The function to call with any errors found. 2423 """ 2424 2425 line = clean_lines.elided[line_number] # Get rid of comments and strings. 2426 2427 else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line) 2428 if not else_match: 2429 return 2430 2431 else_indentation = else_match.group('else_indentation') 2432 inner_indentation = else_indentation + ' ' * 4 2433 2434 previous_lines = clean_lines.elided[:line_number] 2435 previous_lines.reverse() 2436 line_offset = 0 2437 encountered_exit_statement = False 2438 2439 for current_line in previous_lines: 2440 line_offset -= 1 2441 2442 # Skip not only empty lines but also those with preprocessor directives 2443 # and goto labels. 2444 if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line): 2445 continue 2446 2447 # Skip lines with closing braces on the original indentation level. 2448 # Even though the styleguide says they should be on the same line as 2449 # the "else if" statement, we also want to check for instances where 2450 # the current code does not comply with the coding style. Thus, ignore 2451 # these lines and proceed to the line before that. 2452 if current_line == else_indentation + '}': 2453 continue 2454 2455 current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line); 2456 current_indentation = current_indentation_match.group('indentation') 2457 remaining_line = current_indentation_match.group('remaining_line') 2458 2459 # As we're going up the lines, the first real statement to encounter 2460 # has to be an exit statement (return, break, continue or goto) - 2461 # otherwise, this check doesn't apply. 2462 if not encountered_exit_statement: 2463 # We only want to find exit statements if they are on exactly 2464 # the same level of indentation as expected from the code inside 2465 # the block. If the indentation doesn't strictly match then we 2466 # might have a nested if or something, which must be ignored. 2467 if current_indentation != inner_indentation: 2468 break 2469 if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line): 2470 encountered_exit_statement = True 2471 continue 2472 break 2473 2474 # When code execution reaches this point, we've found an exit statement 2475 # as last statement of the previous block. Now we only need to make 2476 # sure that the block belongs to an "if", then we can throw an error. 2477 2478 # Skip lines with opening braces on the original indentation level, 2479 # similar to the closing braces check above. ("if (condition)\n{") 2480 if current_line == else_indentation + '{': 2481 continue 2482 2483 # Skip everything that's further indented than our "else" or "else if". 2484 if current_indentation.startswith(else_indentation) and current_indentation != else_indentation: 2485 continue 2486 2487 # So we've got a line with same (or less) indentation. Is it an "if"? 2488 # If yes: throw an error. If no: don't throw an error. 2489 # Whatever the outcome, this is the end of our loop. 2490 if match(r'if\s*\(', remaining_line): 2491 if else_match.start('else') != -1: 2492 error(line_number + line_offset, 'readability/control_flow', 4, 2493 'An else statement can be removed when the prior "if" ' 2494 'concludes with a return, break, continue or goto statement.') 2495 else: 2496 error(line_number + line_offset, 'readability/control_flow', 4, 2497 'An else if statement should be written as an if statement ' 2498 'when the prior "if" concludes with a return, break, ' 2499 'continue or goto statement.') 2500 break 2501 2502 2503 def replaceable_check(operator, macro, line): 2504 """Determine whether a basic CHECK can be replaced with a more specific one. 2505 2506 For example suggest using CHECK_EQ instead of CHECK(a == b) and 2507 similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE. 2508 2509 Args: 2510 operator: The C++ operator used in the CHECK. 2511 macro: The CHECK or EXPECT macro being called. 2512 line: The current source line. 2513 2514 Returns: 2515 True if the CHECK can be replaced with a more specific one. 2516 """ 2517 2518 # This matches decimal and hex integers, strings, and chars (in that order). 2519 match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')' 2520 2521 # Expression to match two sides of the operator with something that 2522 # looks like a literal, since CHECK(x == iterator) won't compile. 2523 # This means we can't catch all the cases where a more specific 2524 # CHECK is possible, but it's less annoying than dealing with 2525 # extraneous warnings. 2526 match_this = (r'\s*' + macro + r'\((\s*' + 2527 match_constant + r'\s*' + operator + r'[^<>].*|' 2528 r'.*[^<>]' + operator + r'\s*' + match_constant + 2529 r'\s*\))') 2530 2531 # Don't complain about CHECK(x == NULL) or similar because 2532 # CHECK_EQ(x, NULL) won't compile (requires a cast). 2533 # Also, don't complain about more complex boolean expressions 2534 # involving && or || such as CHECK(a == b || c == d). 2535 return match(match_this, line) and not search(r'NULL|&&|\|\|', line) 2536 2537 2538 def check_check(clean_lines, line_number, error): 2539 """Checks the use of CHECK and EXPECT macros. 2540 2541 Args: 2542 clean_lines: A CleansedLines instance containing the file. 2543 line_number: The number of the line to check. 2544 error: The function to call with any errors found. 2545 """ 2546 2547 # Decide the set of replacement macros that should be suggested 2548 raw_lines = clean_lines.raw_lines 2549 current_macro = '' 2550 for macro in _CHECK_MACROS: 2551 if raw_lines[line_number].find(macro) >= 0: 2552 current_macro = macro 2553 break 2554 if not current_macro: 2555 # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT' 2556 return 2557 2558 line = clean_lines.elided[line_number] # get rid of comments and strings 2559 2560 # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc. 2561 for operator in ['==', '!=', '>=', '>', '<=', '<']: 2562 if replaceable_check(operator, current_macro, line): 2563 error(line_number, 'readability/check', 2, 2564 'Consider using %s instead of %s(a %s b)' % ( 2565 _CHECK_REPLACEMENT[current_macro][operator], 2566 current_macro, operator)) 2567 break 2568 2569 2570 def check_for_comparisons_to_zero(clean_lines, line_number, error): 2571 # Get the line without comments and strings. 2572 line = clean_lines.elided[line_number] 2573 2574 # Include NULL here so that users don't have to convert NULL to 0 first and then get this error. 2575 if search(r'[=!]=\s*(NULL|0|true|false)[^\w.]', line) or search(r'[^\w.](NULL|0|true|false)\s*[=!]=', line): 2576 if not search('LIKELY', line) and not search('UNLIKELY', line): 2577 error(line_number, 'readability/comparison_to_zero', 5, 2578 'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.') 2579 2580 2581 def check_for_null(clean_lines, line_number, file_state, error): 2582 # This check doesn't apply to C or Objective-C implementation files. 2583 if file_state.is_c_or_objective_c(): 2584 return 2585 2586 line = clean_lines.elided[line_number] 2587 2588 # Don't warn about NULL usage in g_*(). See Bug 32858 and 39372. 2589 if search(r'\bg(_[a-z]+)+\b', line): 2590 return 2591 2592 # Don't warn about NULL usage in gst_*(). See Bug 70498. 2593 if search(r'\bgst(_[a-z]+)+\b', line): 2594 return 2595 2596 # Don't warn about NULL usage in gdk_pixbuf_save_to_*{join,concat}(). See Bug 43090. 2597 if search(r'\bgdk_pixbuf_save_to\w+\b', line): 2598 return 2599 2600 # Don't warn about NULL usage in gtk_widget_style_get(), gtk_style_context_get_style(), or gtk_style_context_get(). See Bug 51758 2601 if search(r'\bgtk_widget_style_get\(\w+\b', line) or search(r'\bgtk_style_context_get_style\(\w+\b', line) or search(r'\bgtk_style_context_get\(\w+\b', line): 2602 return 2603 2604 # Don't warn about NULL usage in soup_server_new(). See Bug 77890. 2605 if search(r'\bsoup_server_new\(\w+\b', line): 2606 return 2607 2608 if search(r'\bNULL\b', line): 2609 error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.') 2610 return 2611 2612 line = clean_lines.raw_lines[line_number] 2613 # See if NULL occurs in any comments in the line. If the search for NULL using the raw line 2614 # matches, then do the check with strings collapsed to avoid giving errors for 2615 # NULLs occurring in strings. 2616 if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)): 2617 error(line_number, 'readability/null', 4, 'Use 0 or null instead of NULL (even in *comments*).') 2618 2619 def get_line_width(line): 2620 """Determines the width of the line in column positions. 2621 2622 Args: 2623 line: A string, which may be a Unicode string. 2624 2625 Returns: 2626 The width of the line in column positions, accounting for Unicode 2627 combining characters and wide characters. 2628 """ 2629 if isinstance(line, unicode): 2630 width = 0 2631 for c in unicodedata.normalize('NFC', line): 2632 if unicodedata.east_asian_width(c) in ('W', 'F'): 2633 width += 2 2634 elif not unicodedata.combining(c): 2635 width += 1 2636 return width 2637 return len(line) 2638 2639 2640 def check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line_number, error): 2641 """Scans the bodies of conditionals and loops, and in particular 2642 all the arms of conditionals, for violations in the use of braces. 2643 2644 Specifically: 2645 2646 (1) If an arm omits braces, then the following statement must be on one 2647 physical line. 2648 (2) If any arm uses braces, all arms must use them. 2649 2650 These checks are only done here if we find the start of an 2651 'if/for/foreach/while' statement, because this function fails fast 2652 if it encounters constructs it doesn't understand. Checks 2653 elsewhere validate other constraints, such as requiring '}' and 2654 'else' to be on the same line. 2655 2656 Args: 2657 clean_lines: A CleansedLines instance containing the file. 2658 line_number: The number of the line to check. 2659 error: The function to call with any errors found. 2660 """ 2661 2662 # We work with the elided lines. Comments have been removed, but line 2663 # numbers are preserved, so we can still find situations where 2664 # single-expression control clauses span multiple lines, or when a 2665 # comment preceded the expression. 2666 lines = clean_lines.elided 2667 line = lines[line_number] 2668 2669 # Match control structures. 2670 control_match = match(r'\s*(if|foreach|for|while)\s*\(', line) 2671 if not control_match: 2672 return 2673 2674 # Found the start of a conditional or loop. 2675 2676 # The following loop handles all potential arms of the control clause. 2677 # The initial conditions are the following: 2678 # - We start on the opening paren '(' of the condition, *unless* we are 2679 # handling an 'else' block, in which case there is no condition. 2680 # - In the latter case, we start at the position just beyond the 'else' 2681 # token. 2682 expect_conditional_expression = True 2683 know_whether_using_braces = False 2684 using_braces = False 2685 search_for_else_clause = control_match.group(1) == "if" 2686 current_pos = Position(line_number, control_match.end() - 1) 2687 2688 while True: 2689 if expect_conditional_expression: 2690 # Try to find the end of the conditional expression, 2691 # potentially spanning multiple lines. 2692 open_paren_pos = current_pos 2693 close_paren_pos = close_expression(lines, open_paren_pos) 2694 if close_paren_pos.column < 0: 2695 return 2696 current_pos = close_paren_pos 2697 2698 end_line_of_conditional = current_pos.row 2699 2700 # Find the start of the body. 2701 current_pos = _find_in_lines(r'\S', lines, current_pos, None) 2702 if not current_pos: 2703 return 2704 2705 current_arm_uses_brace = False 2706 if lines[current_pos.row][current_pos.column] == '{': 2707 current_arm_uses_brace = True 2708 if know_whether_using_braces: 2709 if using_braces != current_arm_uses_brace: 2710 error(current_pos.row, 'whitespace/braces', 4, 2711 'If one part of an if-else statement uses curly braces, the other part must too.') 2712 return 2713 know_whether_using_braces = True 2714 using_braces = current_arm_uses_brace 2715 2716 if using_braces: 2717 # Skip over the entire arm. 2718 current_pos = close_expression(lines, current_pos) 2719 if current_pos.column < 0: 2720 return 2721 else: 2722 # Skip over the current expression. 2723 current_line_number = current_pos.row 2724 current_pos = _find_in_lines(r';', lines, current_pos, None) 2725 if not current_pos: 2726 return 2727 # If the end of the expression is beyond the line just after 2728 # the close parenthesis or control clause, we've found a 2729 # single-expression arm that spans multiple lines. (We don't 2730 # fire this error for expressions ending on the same line; that 2731 # is a different error, handled elsewhere.) 2732 if current_pos.row > 1 + end_line_of_conditional: 2733 error(current_pos.row, 'whitespace/braces', 4, 2734 'A conditional or loop body must use braces if the statement is more than one line long.') 2735 return 2736 current_pos = Position(current_pos.row, 1 + current_pos.column) 2737 2738 # At this point current_pos points just past the end of the last 2739 # arm. If we just handled the last control clause, we're done. 2740 if not search_for_else_clause: 2741 return 2742 2743 # Scan forward for the next non-whitespace character, and see 2744 # whether we are continuing a conditional (with an 'else' or 2745 # 'else if'), or are done. 2746 current_pos = _find_in_lines(r'\S', lines, current_pos, None) 2747 if not current_pos: 2748 return 2749 next_nonspace_string = lines[current_pos.row][current_pos.column:] 2750 next_conditional = match(r'(else\s*if|else)', next_nonspace_string) 2751 if not next_conditional: 2752 # Done processing this 'if' and all arms. 2753 return 2754 if next_conditional.group(1) == "else if": 2755 current_pos = _find_in_lines(r'\(', lines, current_pos, None) 2756 else: 2757 current_pos.column += 4 # skip 'else' 2758 expect_conditional_expression = False 2759 search_for_else_clause = False 2760 # End while loop 2761 2762 def check_style(clean_lines, line_number, file_extension, class_state, file_state, enum_state, error): 2763 """Checks rules from the 'C++ style rules' section of cppguide.html. 2764 2765 Most of these rules are hard to test (naming, comment style), but we 2766 do what we can. In particular we check for 4-space indents, line lengths, 2767 tab usage, spaces inside code, etc. 2768 2769 Args: 2770 clean_lines: A CleansedLines instance containing the file. 2771 line_number: The number of the line to check. 2772 file_extension: The extension (without the dot) of the filename. 2773 class_state: A _ClassState instance which maintains information about 2774 the current stack of nested class declarations being parsed. 2775 file_state: A _FileState instance which maintains information about 2776 the state of things in the file. 2777 enum_state: A _EnumState instance which maintains the current enum state. 2778 error: The function to call with any errors found. 2779 """ 2780 2781 raw_lines = clean_lines.raw_lines 2782 line = raw_lines[line_number] 2783 2784 if line.find('\t') != -1: 2785 error(line_number, 'whitespace/tab', 1, 2786 'Tab found; better to use spaces') 2787 2788 cleansed_line = clean_lines.elided[line_number] 2789 if line and line[-1].isspace(): 2790 error(line_number, 'whitespace/end_of_line', 4, 2791 'Line ends in whitespace. Consider deleting these extra spaces.') 2792 2793 if (cleansed_line.count(';') > 1 2794 # for loops are allowed two ;'s (and may run over two lines). 2795 and cleansed_line.find('for') == -1 2796 and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1 2797 or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1) 2798 # It's ok to have many commands in a switch case that fits in 1 line 2799 and not ((cleansed_line.find('case ') != -1 2800 or cleansed_line.find('default:') != -1) 2801 and cleansed_line.find('break;') != -1) 2802 # Also it's ok to have many commands in trivial single-line accessors in class definitions. 2803 and not (match(r'.*\(.*\).*{.*.}', line) 2804 and class_state.classinfo_stack 2805 and line.count('{') == line.count('}')) 2806 and not cleansed_line.startswith('#define ') 2807 # It's ok to use use WTF_MAKE_NONCOPYABLE and WTF_MAKE_FAST_ALLOCATED macros in 1 line 2808 and not (cleansed_line.find("WTF_MAKE_NONCOPYABLE") != -1 2809 and cleansed_line.find("WTF_MAKE_FAST_ALLOCATED") != -1)): 2810 error(line_number, 'whitespace/newline', 4, 2811 'More than one command on the same line') 2812 2813 if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'): 2814 error(line_number, 'whitespace/operators', 4, 2815 'Boolean expressions that span multiple lines should have their ' 2816 'operators on the left side of the line instead of the right side.') 2817 2818 # Some more style checks 2819 check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error) 2820 check_directive_indentation(clean_lines, line_number, file_state, error) 2821 check_using_std(clean_lines, line_number, file_state, error) 2822 check_max_min_macros(clean_lines, line_number, file_state, error) 2823 check_ctype_functions(clean_lines, line_number, file_state, error) 2824 check_switch_indentation(clean_lines, line_number, error) 2825 check_braces(clean_lines, line_number, error) 2826 check_exit_statement_simplifications(clean_lines, line_number, error) 2827 check_spacing(file_extension, clean_lines, line_number, error) 2828 check_check(clean_lines, line_number, error) 2829 check_for_comparisons_to_zero(clean_lines, line_number, error) 2830 check_for_null(clean_lines, line_number, file_state, error) 2831 check_indentation_amount(clean_lines, line_number, error) 2832 check_enum_casing(clean_lines, line_number, enum_state, error) 2833 2834 2835 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"') 2836 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') 2837 # Matches the first component of a filename delimited by -s and _s. That is: 2838 # _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' 2839 # _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo' 2840 # _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo' 2841 # _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo' 2842 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') 2843 2844 2845 def _drop_common_suffixes(filename): 2846 """Drops common suffixes like _test.cpp or -inl.h from filename. 2847 2848 For example: 2849 >>> _drop_common_suffixes('foo/foo-inl.h') 2850 'foo/foo' 2851 >>> _drop_common_suffixes('foo/bar/foo.cpp') 2852 'foo/bar/foo' 2853 >>> _drop_common_suffixes('foo/foo_internal.h') 2854 'foo/foo' 2855 >>> _drop_common_suffixes('foo/foo_unusualinternal.h') 2856 'foo/foo_unusualinternal' 2857 2858 Args: 2859 filename: The input filename. 2860 2861 Returns: 2862 The filename with the common suffix removed. 2863 """ 2864 for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp', 2865 'inl.h', 'impl.h', 'internal.h'): 2866 if (filename.endswith(suffix) and len(filename) > len(suffix) 2867 and filename[-len(suffix) - 1] in ('-', '_')): 2868 return filename[:-len(suffix) - 1] 2869 return os.path.splitext(filename)[0] 2870 2871 2872 def _classify_include(filename, include, is_system, include_state): 2873 """Figures out what kind of header 'include' is. 2874 2875 Args: 2876 filename: The current file cpp_style is running over. 2877 include: The path to a #included file. 2878 is_system: True if the #include used <> rather than "". 2879 include_state: An _IncludeState instance in which the headers are inserted. 2880 2881 Returns: 2882 One of the _XXX_HEADER constants. 2883 2884 For example: 2885 >>> _classify_include('foo.cpp', 'config.h', False) 2886 _CONFIG_HEADER 2887 >>> _classify_include('foo.cpp', 'foo.h', False) 2888 _PRIMARY_HEADER 2889 >>> _classify_include('foo.cpp', 'bar.h', False) 2890 _OTHER_HEADER 2891 """ 2892 2893 # If it is a system header we know it is classified as _OTHER_HEADER. 2894 if is_system and not include.startswith('public/'): 2895 return _OTHER_HEADER 2896 2897 # If the include is named config.h then this is WebCore/config.h. 2898 if include == "config.h": 2899 return _CONFIG_HEADER 2900 2901 # There cannot be primary includes in header files themselves. Only an 2902 # include exactly matches the header filename will be is flagged as 2903 # primary, so that it triggers the "don't include yourself" check. 2904 if filename.endswith('.h') and filename != include: 2905 return _OTHER_HEADER; 2906 2907 # Qt's moc files do not follow the naming and ordering rules, so they should be skipped 2908 if include.startswith('moc_') and include.endswith('.cpp'): 2909 return _MOC_HEADER 2910 2911 if include.endswith('.moc'): 2912 return _MOC_HEADER 2913 2914 # If the target file basename starts with the include we're checking 2915 # then we consider it the primary header. 2916 target_base = FileInfo(filename).base_name() 2917 include_base = FileInfo(include).base_name() 2918 2919 # If we haven't encountered a primary header, then be lenient in checking. 2920 if not include_state.visited_primary_section(): 2921 if target_base.find(include_base) != -1: 2922 return _PRIMARY_HEADER 2923 # Qt private APIs use _p.h suffix. 2924 if include_base.find(target_base) != -1 and include_base.endswith('_p'): 2925 return _PRIMARY_HEADER 2926 2927 # If we already encountered a primary header, perform a strict comparison. 2928 # In case the two filename bases are the same then the above lenient check 2929 # probably was a false positive. 2930 elif include_state.visited_primary_section() and target_base == include_base: 2931 if include == "ResourceHandleWin.h": 2932 # FIXME: Thus far, we've only seen one example of these, but if we 2933 # start to see more, please consider generalizing this check 2934 # somehow. 2935 return _OTHER_HEADER 2936 return _PRIMARY_HEADER 2937 2938 return _OTHER_HEADER 2939 2940 2941 def _does_primary_header_exist(filename): 2942 """Return a primary header file name for a file, or empty string 2943 if the file is not source file or primary header does not exist. 2944 """ 2945 fileinfo = FileInfo(filename) 2946 if not fileinfo.is_source(): 2947 return False 2948 primary_header = fileinfo.no_extension() + ".h" 2949 return os.path.isfile(primary_header) 2950 2951 2952 def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error): 2953 """Check rules that are applicable to #include lines. 2954 2955 Strings on #include lines are NOT removed from elided line, to make 2956 certain tasks easier. However, to prevent false positives, checks 2957 applicable to #include lines in CheckLanguage must be put here. 2958 2959 Args: 2960 filename: The name of the current file. 2961 file_extension: The current file extension, without the leading dot. 2962 clean_lines: A CleansedLines instance containing the file. 2963 line_number: The number of the line to check. 2964 include_state: An _IncludeState instance in which the headers are inserted. 2965 error: The function to call with any errors found. 2966 """ 2967 # FIXME: For readability or as a possible optimization, consider 2968 # exiting early here by checking whether the "build/include" 2969 # category should be checked for the given filename. This 2970 # may involve having the error handler classes expose a 2971 # should_check() method, in addition to the usual __call__ 2972 # method. 2973 line = clean_lines.lines[line_number] 2974 2975 matched = _RE_PATTERN_INCLUDE.search(line) 2976 if not matched: 2977 return 2978 2979 include = matched.group(2) 2980 is_system = (matched.group(1) == '<') 2981 2982 # Look for any of the stream classes that are part of standard C++. 2983 if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include): 2984 error(line_number, 'readability/streams', 3, 2985 'Streams are highly discouraged.') 2986 2987 # Look for specific includes to fix. 2988 if include.startswith('wtf/') and is_system: 2989 error(line_number, 'build/include', 4, 2990 'wtf includes should be "wtf/file.h" instead of <wtf/file.h>.') 2991 2992 if filename.find('/chromium/') != -1 and include.startswith('cc/CC'): 2993 error(line_number, 'build/include', 4, 2994 'cc includes should be "CCFoo.h" instead of "cc/CCFoo.h".') 2995 2996 duplicate_header = include in include_state 2997 if duplicate_header: 2998 error(line_number, 'build/include', 4, 2999 '"%s" already included at %s:%s' % 3000 (include, filename, include_state[include])) 3001 else: 3002 include_state[include] = line_number 3003 3004 header_type = _classify_include(filename, include, is_system, include_state) 3005 primary_header_exists = _does_primary_header_exist(filename) 3006 include_state.header_types[line_number] = header_type 3007 3008 # Only proceed if this isn't a duplicate header. 3009 if duplicate_header: 3010 return 3011 3012 # We want to ensure that headers appear in the right order: 3013 # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted 3014 # 2) for header files: alphabetically sorted 3015 # The include_state object keeps track of the last type seen 3016 # and complains if the header types are out of order or missing. 3017 error_message = include_state.check_next_include_order(header_type, 3018 file_extension == "h", 3019 primary_header_exists) 3020 3021 # Check to make sure we have a blank line after primary header. 3022 if not error_message and header_type == _PRIMARY_HEADER: 3023 next_line = clean_lines.raw_lines[line_number + 1] 3024 if not is_blank_line(next_line): 3025 error(line_number, 'build/include_order', 4, 3026 'You should add a blank line after implementation file\'s own header.') 3027 3028 # Check to make sure all headers besides config.h and the primary header are 3029 # alphabetically sorted. Skip Qt's moc files. 3030 if not error_message and header_type == _OTHER_HEADER: 3031 previous_line_number = line_number - 1; 3032 previous_line = clean_lines.lines[previous_line_number] 3033 previous_match = _RE_PATTERN_INCLUDE.search(previous_line) 3034 while (not previous_match and previous_line_number > 0 3035 and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)): 3036 previous_line_number -= 1; 3037 previous_line = clean_lines.lines[previous_line_number] 3038 previous_match = _RE_PATTERN_INCLUDE.search(previous_line) 3039 if previous_match: 3040 previous_header_type = include_state.header_types[previous_line_number] 3041 if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip(): 3042 # This type of error is potentially a problem with this line or the previous one, 3043 # so if the error is filtered for one line, report it for the next. This is so that 3044 # we properly handle patches, for which only modified lines produce errors. 3045 if not error(line_number - 1, 'build/include_order', 4, 'Alphabetical sorting problem.'): 3046 error(line_number, 'build/include_order', 4, 'Alphabetical sorting problem.') 3047 3048 if error_message: 3049 if file_extension == 'h': 3050 error(line_number, 'build/include_order', 4, 3051 '%s Should be: alphabetically sorted.' % 3052 error_message) 3053 else: 3054 error(line_number, 'build/include_order', 4, 3055 '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' % 3056 error_message) 3057 3058 3059 def check_language(filename, clean_lines, line_number, file_extension, include_state, 3060 file_state, error): 3061 """Checks rules from the 'C++ language rules' section of cppguide.html. 3062 3063 Some of these rules are hard to test (function overloading, using 3064 uint32 inappropriately), but we do the best we can. 3065 3066 Args: 3067 filename: The name of the current file. 3068 clean_lines: A CleansedLines instance containing the file. 3069 line_number: The number of the line to check. 3070 file_extension: The extension (without the dot) of the filename. 3071 include_state: An _IncludeState instance in which the headers are inserted. 3072 file_state: A _FileState instance which maintains information about 3073 the state of things in the file. 3074 error: The function to call with any errors found. 3075 """ 3076 # If the line is empty or consists of entirely a comment, no need to 3077 # check it. 3078 line = clean_lines.elided[line_number] 3079 if not line: 3080 return 3081 3082 matched = _RE_PATTERN_INCLUDE.search(line) 3083 if matched: 3084 check_include_line(filename, file_extension, clean_lines, line_number, include_state, error) 3085 return 3086 3087 # FIXME: figure out if they're using default arguments in fn proto. 3088 3089 # Check to see if they're using an conversion function cast. 3090 # I just try to capture the most common basic types, though there are more. 3091 # Parameterless conversion functions, such as bool(), are allowed as they are 3092 # probably a member operator declaration or default constructor. 3093 matched = search( 3094 r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line) 3095 if matched: 3096 # gMock methods are defined using some variant of MOCK_METHODx(name, type) 3097 # where type may be float(), int(string), etc. Without context they are 3098 # virtually indistinguishable from int(x) casts. 3099 if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line): 3100 error(line_number, 'readability/casting', 4, 3101 'Using deprecated casting style. ' 3102 'Use static_cast<%s>(...) instead' % 3103 matched.group(1)) 3104 3105 check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number], 3106 'static_cast', 3107 r'\((int|float|double|bool|char|u?int(16|32|64))\)', 3108 error) 3109 # This doesn't catch all cases. Consider (const char * const)"hello". 3110 check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number], 3111 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error) 3112 3113 # In addition, we look for people taking the address of a cast. This 3114 # is dangerous -- casts can assign to temporaries, so the pointer doesn't 3115 # point where you think. 3116 if search( 3117 r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line): 3118 error(line_number, 'runtime/casting', 4, 3119 ('Are you taking an address of a cast? ' 3120 'This is dangerous: could be a temp var. ' 3121 'Take the address before doing the cast, rather than after')) 3122 3123 # Check for people declaring static/global STL strings at the top level. 3124 # This is dangerous because the C++ language does not guarantee that 3125 # globals with constructors are initialized before the first access. 3126 matched = match( 3127 r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)', 3128 line) 3129 # Make sure it's not a function. 3130 # Function template specialization looks like: "string foo<Type>(...". 3131 # Class template definitions look like: "string Foo<Type>::Method(...". 3132 if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', 3133 matched.group(3)): 3134 error(line_number, 'runtime/string', 4, 3135 'For a static/global string constant, use a C style string instead: ' 3136 '"%schar %s[]".' % 3137 (matched.group(1), matched.group(2))) 3138 3139 # Check that we're not using RTTI outside of testing code. 3140 if search(r'\bdynamic_cast<', line): 3141 error(line_number, 'runtime/rtti', 5, 3142 'Do not use dynamic_cast<>. If you need to cast within a class ' 3143 "hierarchy, use static_cast<> to upcast. Google doesn't support " 3144 'RTTI.') 3145 3146 if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line): 3147 error(line_number, 'runtime/init', 4, 3148 'You seem to be initializing a member variable with itself.') 3149 3150 if file_extension == 'h': 3151 # FIXME: check that 1-arg constructors are explicit. 3152 # How to tell it's a constructor? 3153 # (handled in check_for_non_standard_constructs for now) 3154 pass 3155 3156 # Check if people are using the verboten C basic types. The only exception 3157 # we regularly allow is "unsigned short port" for port. 3158 if search(r'\bshort port\b', line): 3159 if not search(r'\bunsigned short port\b', line): 3160 error(line_number, 'runtime/int', 4, 3161 'Use "unsigned short" for ports, not "short"') 3162 3163 # When snprintf is used, the second argument shouldn't be a literal. 3164 matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) 3165 if matched: 3166 error(line_number, 'runtime/printf', 3, 3167 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' 3168 'to snprintf.' % (matched.group(1), matched.group(2))) 3169 3170 # Check if some verboten C functions are being used. 3171 if search(r'\bsprintf\b', line): 3172 error(line_number, 'runtime/printf', 5, 3173 'Never use sprintf. Use snprintf instead.') 3174 matched = search(r'\b(strcpy|strcat)\b', line) 3175 if matched: 3176 error(line_number, 'runtime/printf', 4, 3177 'Almost always, snprintf is better than %s' % matched.group(1)) 3178 3179 if search(r'\bsscanf\b', line): 3180 error(line_number, 'runtime/printf', 1, 3181 'sscanf can be ok, but is slow and can overflow buffers.') 3182 3183 # Check for suspicious usage of "if" like 3184 # } if (a == b) { 3185 if search(r'\}\s*if\s*\(', line): 3186 error(line_number, 'readability/braces', 4, 3187 'Did you mean "else if"? If not, start a new line for "if".') 3188 3189 # Check for potential format string bugs like printf(foo). 3190 # We constrain the pattern not to pick things like DocidForPrintf(foo). 3191 # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) 3192 matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I) 3193 if matched: 3194 error(line_number, 'runtime/printf', 4, 3195 'Potential format string bug. Do %s("%%s", %s) instead.' 3196 % (matched.group(1), matched.group(2))) 3197 3198 # Check for potential memset bugs like memset(buf, sizeof(buf), 0). 3199 matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) 3200 if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)): 3201 error(line_number, 'runtime/memset', 4, 3202 'Did you mean "memset(%s, 0, %s)"?' 3203 % (matched.group(1), matched.group(2))) 3204 3205 # Detect variable-length arrays. 3206 matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) 3207 if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and 3208 matched.group(3).find(']') == -1): 3209 # Split the size using space and arithmetic operators as delimiters. 3210 # If any of the resulting tokens are not compile time constants then 3211 # report the error. 3212 tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3)) 3213 is_const = True 3214 skip_next = False 3215 for tok in tokens: 3216 if skip_next: 3217 skip_next = False 3218 continue 3219 3220 if search(r'sizeof\(.+\)', tok): 3221 continue 3222 if search(r'arraysize\(\w+\)', tok): 3223 continue 3224 3225 tok = tok.lstrip('(') 3226 tok = tok.rstrip(')') 3227 if not tok: 3228 continue 3229 if match(r'\d+', tok): 3230 continue 3231 if match(r'0[xX][0-9a-fA-F]+', tok): 3232 continue 3233 if match(r'k[A-Z0-9]\w*', tok): 3234 continue 3235 if match(r'(.+::)?k[A-Z0-9]\w*', tok): 3236 continue 3237 if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): 3238 continue 3239 # A catch all for tricky sizeof cases, including 'sizeof expression', 3240 # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' 3241 # requires skipping the next token becasue we split on ' ' and '*'. 3242 if tok.startswith('sizeof'): 3243 skip_next = True 3244 continue 3245 is_const = False 3246 break 3247 if not is_const: 3248 error(line_number, 'runtime/arrays', 1, 3249 'Do not use variable-length arrays. Use an appropriately named ' 3250 "('k' followed by CamelCase) compile-time constant for the size.") 3251 3252 # Check for use of unnamed namespaces in header files. Registration 3253 # macros are typically OK, so we allow use of "namespace {" on lines 3254 # that end with backslashes. 3255 if (file_extension == 'h' 3256 and search(r'\bnamespace\s*{', line) 3257 and line[-1] != '\\'): 3258 error(line_number, 'build/namespaces', 4, 3259 'Do not use unnamed namespaces in header files. See ' 3260 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' 3261 ' for more information.') 3262 3263 # Check for plain bitfields declared without either "singed" or "unsigned". 3264 # Most compilers treat such bitfields as signed, but there are still compilers like 3265 # RVCT 4.0 that use unsigned by default. 3266 matched = re.match(r'\s*((const|mutable)\s+)?(char|(short(\s+int)?)|int|long(\s+(long|int))?)\s+[a-zA-Z_][a-zA-Z0-9_]*\s*:\s*\d+\s*;', line) 3267 if matched: 3268 error(line_number, 'runtime/bitfields', 5, 3269 'Please declare integral type bitfields with either signed or unsigned.') 3270 3271 check_identifier_name_in_declaration(filename, line_number, line, file_state, error) 3272 3273 # Check for unsigned int (should be just 'unsigned') 3274 if search(r'\bunsigned int\b', line): 3275 error(line_number, 'runtime/unsigned', 1, 3276 'Omit int when using unsigned') 3277 3278 # Check for usage of static_cast<Classname*>. 3279 check_for_object_static_cast(filename, line_number, line, error) 3280 3281 3282 def check_identifier_name_in_declaration(filename, line_number, line, file_state, error): 3283 """Checks if identifier names contain any underscores. 3284 3285 As identifiers in libraries we are using have a bunch of 3286 underscores, we only warn about the declarations of identifiers 3287 and don't check use of identifiers. 3288 3289 Args: 3290 filename: The name of the current file. 3291 line_number: The number of the line to check. 3292 line: The line of code to check. 3293 file_state: A _FileState instance which maintains information about 3294 the state of things in the file. 3295 error: The function to call with any errors found. 3296 """ 3297 # We don't check return and delete statements and conversion operator declarations. 3298 if match(r'\s*(return|delete|operator)\b', line): 3299 return 3300 3301 # Basically, a declaration is a type name followed by whitespaces 3302 # followed by an identifier. The type name can be complicated 3303 # due to type adjectives and templates. We remove them first to 3304 # simplify the process to find declarations of identifiers. 3305 3306 # Convert "long long", "long double", and "long long int" to 3307 # simple types, but don't remove simple "long". 3308 line = sub(r'long (long )?(?=long|double|int)', '', line) 3309 # Convert unsigned/signed types to simple types, too. 3310 line = sub(r'(unsigned|signed) (?=char|short|int|long)', '', line) 3311 line = sub(r'\b(inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line) 3312 3313 # Remove "new" and "new (expr)" to simplify, too. 3314 line = sub(r'new\s*(\([^)]*\))?', '', line) 3315 3316 # Remove all template parameters by removing matching < and >. 3317 # Loop until no templates are removed to remove nested templates. 3318 while True: 3319 line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line) 3320 if not number_of_replacements: 3321 break 3322 3323 # Declarations of local variables can be in condition expressions 3324 # of control flow statements (e.g., "if (RenderObject* p = o->parent())"). 3325 # We remove the keywords and the first parenthesis. 3326 # 3327 # Declarations in "while", "if", and "switch" are different from 3328 # other declarations in two aspects: 3329 # 3330 # - There can be only one declaration between the parentheses. 3331 # (i.e., you cannot write "if (int i = 0, j = 1) {}") 3332 # - The variable must be initialized. 3333 # (i.e., you cannot write "if (int i) {}") 3334 # 3335 # and we will need different treatments for them. 3336 line = sub(r'^\s*for\s*\(', '', line) 3337 line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line) 3338 3339 # Detect variable and functions. 3340 type_regexp = r'\w([\w]|\s*[*&]\s*|::)+' 3341 identifier_regexp = r'(?P<identifier>[\w:]+)' 3342 maybe_bitfield_regexp = r'(:\s*\d+\s*)?' 3343 character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)' 3344 declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*' + maybe_bitfield_regexp + character_after_identifier_regexp 3345 declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp 3346 is_function_arguments = False 3347 number_of_identifiers = 0 3348 while True: 3349 # If we are seeing the first identifier or arguments of a 3350 # function, there should be a type name before an identifier. 3351 if not number_of_identifiers or is_function_arguments: 3352 declaration_regexp = declaration_with_type_regexp 3353 else: 3354 declaration_regexp = declaration_without_type_regexp 3355 3356 matched = match(declaration_regexp, line) 3357 if not matched: 3358 return 3359 identifier = matched.group('identifier') 3360 character_after_identifier = matched.group('character_after_identifier') 3361 3362 # If we removed a non-for-control statement, the character after 3363 # the identifier should be '='. With this rule, we can avoid 3364 # warning for cases like "if (val & INT_MAX) {". 3365 if control_statement and character_after_identifier != '=': 3366 return 3367 3368 is_function_arguments = is_function_arguments or character_after_identifier == '(' 3369 3370 # Remove "m_" and "s_" to allow them. 3371 modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier) 3372 if not file_state.is_objective_c() and modified_identifier.find('_') >= 0: 3373 # Various exceptions to the rule: JavaScript op codes functions, const_iterator. 3374 if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('op_') >= 0) 3375 and not (filename.find('gtk') >= 0 and modified_identifier.startswith('webkit_') >= 0) 3376 and not modified_identifier.startswith('tst_') 3377 and not modified_identifier.startswith('webkit_dom_object_') 3378 and not modified_identifier.startswith('webkit_soup') 3379 and not modified_identifier.startswith('NPN_') 3380 and not modified_identifier.startswith('NPP_') 3381 and not modified_identifier.startswith('NP_') 3382 and not modified_identifier.startswith('qt_') 3383 and not modified_identifier.startswith('_q_') 3384 and not modified_identifier.startswith('cairo_') 3385 and not modified_identifier.startswith('Ecore_') 3386 and not modified_identifier.startswith('Eina_') 3387 and not modified_identifier.startswith('Evas_') 3388 and not modified_identifier.startswith('Ewk_') 3389 and not modified_identifier.startswith('cti_') 3390 and not modified_identifier.find('::qt_') >= 0 3391 and not modified_identifier.find('::_q_') >= 0 3392 and not modified_identifier == "const_iterator" 3393 and not modified_identifier == "vm_throw" 3394 and not modified_identifier == "DFG_OPERATION"): 3395 error(line_number, 'readability/naming/underscores', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.") 3396 3397 # Check for variables named 'l', these are too easy to confuse with '1' in some fonts 3398 if modified_identifier == 'l': 3399 error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.") 3400 3401 # There can be only one declaration in non-for-control statements. 3402 if control_statement: 3403 return 3404 # We should continue checking if this is a function 3405 # declaration because we need to check its arguments. 3406 # Also, we need to check multiple declarations. 3407 if character_after_identifier != '(' and character_after_identifier != ',': 3408 return 3409 3410 number_of_identifiers += 1 3411 line = line[matched.end():] 3412 3413 3414 def check_for_toFoo_definition(filename, pattern, error): 3415 """ Reports for using static_cast instead of toFoo convenience function. 3416 3417 This function will output warnings to make sure you are actually using 3418 the added toFoo conversion functions rather than directly hard coding 3419 the static_cast<Classname*> call. For example, you should toHTMLELement(Node*) 3420 to convert Node* to HTMLElement*, instead of static_cast<HTMLElement*>(Node*) 3421 3422 Args: 3423 filename: The name of the header file in which to check for toFoo definition. 3424 pattern: The conversion function pattern to grep for. 3425 error: The function to call with any errors found. 3426 """ 3427 def get_abs_filepath(filename): 3428 fileSystem = FileSystem() 3429 base_dir = fileSystem.path_to_module(FileSystem.__module__).split('WebKit', 1)[0] 3430 base_dir = ''.join((base_dir, 'WebKit/Source')) 3431 for root, dirs, names in os.walk(base_dir): 3432 if filename in names: 3433 return os.path.join(root, filename) 3434 return None 3435 3436 def grep(lines, pattern, error): 3437 matches = [] 3438 function_state = None 3439 for line_number in xrange(lines.num_lines()): 3440 line = (lines.elided[line_number]).rstrip() 3441 try: 3442 if pattern in line: 3443 if not function_state: 3444 function_state = _FunctionState(1) 3445 detect_functions(lines, line_number, function_state, error) 3446 # Exclude the match of dummy conversion function. Dummy function is just to 3447 # catch invalid conversions and shouldn't be part of possible alternatives. 3448 result = re.search(r'%s(\s+)%s' % ("void", pattern), line) 3449 if not result: 3450 matches.append([line, function_state.body_start_position.row, function_state.end_position.row + 1]) 3451 function_state = None 3452 except UnicodeDecodeError: 3453 # There would be no non-ascii characters in the codebase ever. The only exception 3454 # would be comments/copyright text which might have non-ascii characters. Hence, 3455 # it is prefectly safe to catch the UnicodeDecodeError and just pass the line. 3456 pass 3457 3458 return matches 3459 3460 def check_in_mock_header(filename, matches=None): 3461 if not filename == 'Foo.h': 3462 return False 3463 3464 header_file = None 3465 try: 3466 header_file = CppChecker.fs.read_text_file(filename) 3467 except IOError: 3468 return False 3469 line_number = 0 3470 for line in header_file: 3471 line_number += 1 3472 matched = re.search(r'\btoFoo\b', line) 3473 if matched: 3474 matches.append(['toFoo', line_number, line_number + 3]) 3475 return True 3476 3477 # For unit testing only, avoid header search and lookup locally. 3478 matches = [] 3479 mock_def_found = check_in_mock_header(filename, matches) 3480 if mock_def_found: 3481 return matches 3482 3483 # Regular style check flow. Search for actual header file & defs. 3484 file_path = get_abs_filepath(filename) 3485 if not file_path: 3486 return None 3487 try: 3488 f = open(file_path) 3489 clean_lines = CleansedLines(f.readlines()) 3490 finally: 3491 f.close() 3492 3493 # Make a list of all genuine alternatives to static_cast. 3494 matches = grep(clean_lines, pattern, error) 3495 return matches 3496 3497 3498 def check_for_object_static_cast(processing_file, line_number, line, error): 3499 """Checks for a Cpp-style static cast on objects by looking for the pattern. 3500 3501 Args: 3502 processing_file: The name of the processing file. 3503 line_number: The number of the line to check. 3504 line: The line of code to check. 3505 error: The function to call with any errors found. 3506 """ 3507 matched = search(r'\bstatic_cast<(\s*\w*:?:?\w+\s*\*+\s*)>', line) 3508 if not matched: 3509 return 3510 3511 class_name = re.sub('[\*]', '', matched.group(1)) 3512 class_name = class_name.strip() 3513 # Ignore (for now) when the casting is to void*, 3514 if class_name == 'void': 3515 return 3516 3517 namespace_pos = class_name.find(':') 3518 if not namespace_pos == -1: 3519 class_name = class_name[namespace_pos + 2:] 3520 3521 header_file = ''.join((class_name, '.h')) 3522 matches = check_for_toFoo_definition(header_file, ''.join(('to', class_name)), error) 3523 # Ignore (for now) if not able to find the header where toFoo might be defined. 3524 # TODO: Handle cases where Classname might be defined in some other header or cpp file. 3525 if matches is None: 3526 return 3527 3528 report_error = True 3529 # Ensure found static_cast instance is not from within toFoo definition itself. 3530 if (os.path.basename(processing_file) == header_file): 3531 for item in matches: 3532 if line_number in range(item[1], item[2]): 3533 report_error = False 3534 break 3535 3536 if report_error: 3537 if len(matches): 3538 # toFoo is defined - enforce using it. 3539 # TODO: Suggest an appropriate toFoo from the alternatives present in matches. 3540 error(line_number, 'runtime/casting', 4, 3541 'static_cast of class objects is not allowed. Use to%s defined in %s.' % 3542 (class_name, header_file)) 3543 else: 3544 # No toFoo defined - enforce definition & usage. 3545 # TODO: Automate the generation of toFoo() to avoid any slippages ever. 3546 error(line_number, 'runtime/casting', 4, 3547 'static_cast of class objects is not allowed. Add to%s in %s and use it instead.' % 3548 (class_name, header_file)) 3549 3550 3551 def check_c_style_cast(line_number, line, raw_line, cast_type, pattern, 3552 error): 3553 """Checks for a C-style cast by looking for the pattern. 3554 3555 This also handles sizeof(type) warnings, due to similarity of content. 3556 3557 Args: 3558 line_number: The number of the line to check. 3559 line: The line of code to check. 3560 raw_line: The raw line of code to check, with comments. 3561 cast_type: The string for the C++ cast to recommend. This is either 3562 reinterpret_cast or static_cast, depending. 3563 pattern: The regular expression used to find C-style casts. 3564 error: The function to call with any errors found. 3565 """ 3566 matched = search(pattern, line) 3567 if not matched: 3568 return 3569 3570 # e.g., sizeof(int) 3571 sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1]) 3572 if sizeof_match: 3573 error(line_number, 'runtime/sizeof', 1, 3574 'Using sizeof(type). Use sizeof(varname) instead if possible') 3575 return 3576 3577 remainder = line[matched.end(0):] 3578 3579 # The close paren is for function pointers as arguments to a function. 3580 # eg, void foo(void (*bar)(int)); 3581 # The semicolon check is a more basic function check; also possibly a 3582 # function pointer typedef. 3583 # eg, void foo(int); or void foo(int) const; 3584 # The equals check is for function pointer assignment. 3585 # eg, void *(*foo)(int) = ... 3586 # 3587 # Right now, this will only catch cases where there's a single argument, and 3588 # it's unnamed. It should probably be expanded to check for multiple 3589 # arguments with some unnamed. 3590 function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder) 3591 if function_match: 3592 if (not function_match.group(3) 3593 or function_match.group(3) == ';' 3594 or raw_line.find('/*') < 0): 3595 error(line_number, 'readability/function', 3, 3596 'All parameters should be named in a function') 3597 return 3598 3599 # At this point, all that should be left is actual casts. 3600 error(line_number, 'readability/casting', 4, 3601 'Using C-style cast. Use %s<%s>(...) instead' % 3602 (cast_type, matched.group(1))) 3603 3604 3605 _HEADERS_CONTAINING_TEMPLATES = ( 3606 ('<deque>', ('deque',)), 3607 ('<functional>', ('unary_function', 'binary_function', 3608 'plus', 'minus', 'multiplies', 'divides', 'modulus', 3609 'negate', 3610 'equal_to', 'not_equal_to', 'greater', 'less', 3611 'greater_equal', 'less_equal', 3612 'logical_and', 'logical_or', 'logical_not', 3613 'unary_negate', 'not1', 'binary_negate', 'not2', 3614 'bind1st', 'bind2nd', 3615 'pointer_to_unary_function', 3616 'pointer_to_binary_function', 3617 'ptr_fun', 3618 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', 3619 'mem_fun_ref_t', 3620 'const_mem_fun_t', 'const_mem_fun1_t', 3621 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', 3622 'mem_fun_ref', 3623 )), 3624 ('<limits>', ('numeric_limits',)), 3625 ('<list>', ('list',)), 3626 ('<map>', ('map', 'multimap',)), 3627 ('<memory>', ('allocator',)), 3628 ('<queue>', ('queue', 'priority_queue',)), 3629 ('<set>', ('set', 'multiset',)), 3630 ('<stack>', ('stack',)), 3631 ('<string>', ('char_traits', 'basic_string',)), 3632 ('<utility>', ('pair',)), 3633 ('<vector>', ('vector',)), 3634 3635 # gcc extensions. 3636 # Note: std::hash is their hash, ::hash is our hash 3637 ('<hash_map>', ('hash_map', 'hash_multimap',)), 3638 ('<hash_set>', ('hash_set', 'hash_multiset',)), 3639 ('<slist>', ('slist',)), 3640 ) 3641 3642 _HEADERS_ACCEPTED_BUT_NOT_PROMOTED = { 3643 # We can trust with reasonable confidence that map gives us pair<>, too. 3644 'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap') 3645 } 3646 3647 _RE_PATTERN_STRING = re.compile(r'\bstring\b') 3648 3649 _re_pattern_algorithm_header = [] 3650 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap', 3651 'transform'): 3652 # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or 3653 # type::max(). 3654 _re_pattern_algorithm_header.append( 3655 (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), 3656 _template, 3657 '<algorithm>')) 3658 3659 _re_pattern_templates = [] 3660 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: 3661 for _template in _templates: 3662 _re_pattern_templates.append( 3663 (re.compile(r'(\<|\b)' + _template + r'\s*\<'), 3664 _template + '<>', 3665 _header)) 3666 3667 3668 def files_belong_to_same_module(filename_cpp, filename_h): 3669 """Check if these two filenames belong to the same module. 3670 3671 The concept of a 'module' here is a as follows: 3672 foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the 3673 same 'module' if they are in the same directory. 3674 some/path/public/xyzzy and some/path/internal/xyzzy are also considered 3675 to belong to the same module here. 3676 3677 If the filename_cpp contains a longer path than the filename_h, for example, 3678 '/absolute/path/to/base/sysinfo.cpp', and this file would include 3679 'base/sysinfo.h', this function also produces the prefix needed to open the 3680 header. This is used by the caller of this function to more robustly open the 3681 header file. We don't have access to the real include paths in this context, 3682 so we need this guesswork here. 3683 3684 Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module 3685 according to this implementation. Because of this, this function gives 3686 some false positives. This should be sufficiently rare in practice. 3687 3688 Args: 3689 filename_cpp: is the path for the .cpp file 3690 filename_h: is the path for the header path 3691 3692 Returns: 3693 Tuple with a bool and a string: 3694 bool: True if filename_cpp and filename_h belong to the same module. 3695 string: the additional prefix needed to open the header file. 3696 """ 3697 3698 if not filename_cpp.endswith('.cpp'): 3699 return (False, '') 3700 filename_cpp = filename_cpp[:-len('.cpp')] 3701 if filename_cpp.endswith('_unittest'): 3702 filename_cpp = filename_cpp[:-len('_unittest')] 3703 elif filename_cpp.endswith('_test'): 3704 filename_cpp = filename_cpp[:-len('_test')] 3705 filename_cpp = filename_cpp.replace('/public/', '/') 3706 filename_cpp = filename_cpp.replace('/internal/', '/') 3707 3708 if not filename_h.endswith('.h'): 3709 return (False, '') 3710 filename_h = filename_h[:-len('.h')] 3711 if filename_h.endswith('-inl'): 3712 filename_h = filename_h[:-len('-inl')] 3713 filename_h = filename_h.replace('/public/', '/') 3714 filename_h = filename_h.replace('/internal/', '/') 3715 3716 files_belong_to_same_module = filename_cpp.endswith(filename_h) 3717 common_path = '' 3718 if files_belong_to_same_module: 3719 common_path = filename_cpp[:-len(filename_h)] 3720 return files_belong_to_same_module, common_path 3721 3722 3723 def update_include_state(filename, include_state): 3724 """Fill up the include_state with new includes found from the file. 3725 3726 Args: 3727 filename: the name of the header to read. 3728 include_state: an _IncludeState instance in which the headers are inserted. 3729 io: The io factory to use to read the file. Provided for testability. 3730 3731 Returns: 3732 True if a header was succesfully added. False otherwise. 3733 """ 3734 header_file = None 3735 try: 3736 header_file = CppChecker.fs.read_text_file(filename) 3737 except IOError: 3738 return False 3739 line_number = 0 3740 for line in header_file: 3741 line_number += 1 3742 clean_line = cleanse_comments(line) 3743 matched = _RE_PATTERN_INCLUDE.search(clean_line) 3744 if matched: 3745 include = matched.group(2) 3746 # The value formatting is cute, but not really used right now. 3747 # What matters here is that the key is in include_state. 3748 include_state.setdefault(include, '%s:%d' % (filename, line_number)) 3749 return True 3750 3751 3752 def check_for_include_what_you_use(filename, clean_lines, include_state, error): 3753 """Reports for missing stl includes. 3754 3755 This function will output warnings to make sure you are including the headers 3756 necessary for the stl containers and functions that you use. We only give one 3757 reason to include a header. For example, if you use both equal_to<> and 3758 less<> in a .h file, only one (the latter in the file) of these will be 3759 reported as a reason to include the <functional>. 3760 3761 Args: 3762 filename: The name of the current file. 3763 clean_lines: A CleansedLines instance containing the file. 3764 include_state: An _IncludeState instance. 3765 error: The function to call with any errors found. 3766 """ 3767 required = {} # A map of header name to line_number and the template entity. 3768 # Example of required: { '<functional>': (1219, 'less<>') } 3769 3770 for line_number in xrange(clean_lines.num_lines()): 3771 line = clean_lines.elided[line_number] 3772 if not line or line[0] == '#': 3773 continue 3774 3775 # String is special -- it is a non-templatized type in STL. 3776 if _RE_PATTERN_STRING.search(line): 3777 required['<string>'] = (line_number, 'string') 3778 3779 for pattern, template, header in _re_pattern_algorithm_header: 3780 if pattern.search(line): 3781 required[header] = (line_number, template) 3782 3783 # The following function is just a speed up, no semantics are changed. 3784 if not '<' in line: # Reduces the cpu time usage by skipping lines. 3785 continue 3786 3787 for pattern, template, header in _re_pattern_templates: 3788 if pattern.search(line): 3789 required[header] = (line_number, template) 3790 3791 # The policy is that if you #include something in foo.h you don't need to 3792 # include it again in foo.cpp. Here, we will look at possible includes. 3793 # Let's copy the include_state so it is only messed up within this function. 3794 include_state = include_state.copy() 3795 3796 # Did we find the header for this file (if any) and succesfully load it? 3797 header_found = False 3798 3799 # Use the absolute path so that matching works properly. 3800 abs_filename = os.path.abspath(filename) 3801 3802 # For Emacs's flymake. 3803 # If cpp_style is invoked from Emacs's flymake, a temporary file is generated 3804 # by flymake and that file name might end with '_flymake.cpp'. In that case, 3805 # restore original file name here so that the corresponding header file can be 3806 # found. 3807 # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h' 3808 # instead of 'foo_flymake.h' 3809 abs_filename = re.sub(r'_flymake\.cpp$', '.cpp', abs_filename) 3810 3811 # include_state is modified during iteration, so we iterate over a copy of 3812 # the keys. 3813 for header in include_state.keys(): #NOLINT 3814 (same_module, common_path) = files_belong_to_same_module(abs_filename, header) 3815 fullpath = common_path + header 3816 if same_module and update_include_state(fullpath, include_state): 3817 header_found = True 3818 3819 # If we can't find the header file for a .cpp, assume it's because we don't 3820 # know where to look. In that case we'll give up as we're not sure they 3821 # didn't include it in the .h file. 3822 # FIXME: Do a better job of finding .h files so we are confident that 3823 # not having the .h file means there isn't one. 3824 if filename.endswith('.cpp') and not header_found: 3825 return 3826 3827 # All the lines have been processed, report the errors found. 3828 for required_header_unstripped in required: 3829 template = required[required_header_unstripped][1] 3830 if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED: 3831 headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template] 3832 if [True for header in headers if header in include_state]: 3833 continue 3834 if required_header_unstripped.strip('<>"') not in include_state: 3835 error(required[required_header_unstripped][0], 3836 'build/include_what_you_use', 4, 3837 'Add #include ' + required_header_unstripped + ' for ' + template) 3838 3839 3840 def process_line(filename, file_extension, 3841 clean_lines, line, include_state, function_state, 3842 class_state, file_state, enum_state, error): 3843 """Processes a single line in the file. 3844 3845 Args: 3846 filename: Filename of the file that is being processed. 3847 file_extension: The extension (dot not included) of the file. 3848 clean_lines: An array of strings, each representing a line of the file, 3849 with comments stripped. 3850 line: Number of line being processed. 3851 include_state: An _IncludeState instance in which the headers are inserted. 3852 function_state: A _FunctionState instance which counts function lines, etc. 3853 class_state: A _ClassState instance which maintains information about 3854 the current stack of nested class declarations being parsed. 3855 file_state: A _FileState instance which maintains information about 3856 the state of things in the file. 3857 enum_state: A _EnumState instance which maintains an enum declaration 3858 state. 3859 error: A callable to which errors are reported, which takes arguments: 3860 line number, error level, and message 3861 3862 """ 3863 raw_lines = clean_lines.raw_lines 3864 detect_functions(clean_lines, line, function_state, error) 3865 check_for_function_lengths(clean_lines, line, function_state, error) 3866 if search(r'\bNOLINT\b', raw_lines[line]): # ignore nolint lines 3867 return 3868 if match(r'\s*\b__asm\b', raw_lines[line]): # Ignore asm lines as they format differently. 3869 return 3870 check_function_definition(filename, file_extension, clean_lines, line, function_state, error) 3871 check_pass_ptr_usage(clean_lines, line, function_state, error) 3872 check_for_leaky_patterns(clean_lines, line, function_state, error) 3873 check_for_multiline_comments_and_strings(clean_lines, line, error) 3874 check_style(clean_lines, line, file_extension, class_state, file_state, enum_state, error) 3875 check_language(filename, clean_lines, line, file_extension, include_state, 3876 file_state, error) 3877 check_for_non_standard_constructs(clean_lines, line, class_state, error) 3878 check_posix_threading(clean_lines, line, error) 3879 check_invalid_increment(clean_lines, line, error) 3880 check_conditional_and_loop_bodies_for_brace_violations(clean_lines, line, error) 3881 3882 def _process_lines(filename, file_extension, lines, error, min_confidence): 3883 """Performs lint checks and reports any errors to the given error function. 3884 3885 Args: 3886 filename: Filename of the file that is being processed. 3887 file_extension: The extension (dot not included) of the file. 3888 lines: An array of strings, each representing a line of the file, with the 3889 last element being empty if the file is termined with a newline. 3890 error: A callable to which errors are reported, which takes 4 arguments: 3891 """ 3892 lines = (['// marker so line numbers and indices both start at 1'] + lines + 3893 ['// marker so line numbers end in a known way']) 3894 3895 include_state = _IncludeState() 3896 function_state = _FunctionState(min_confidence) 3897 class_state = _ClassState() 3898 3899 check_for_copyright(lines, error) 3900 3901 if file_extension == 'h': 3902 check_for_header_guard(filename, lines, error) 3903 3904 remove_multi_line_comments(lines, error) 3905 clean_lines = CleansedLines(lines) 3906 file_state = _FileState(clean_lines, file_extension) 3907 enum_state = _EnumState() 3908 for line in xrange(clean_lines.num_lines()): 3909 process_line(filename, file_extension, clean_lines, line, 3910 include_state, function_state, class_state, file_state, 3911 enum_state, error) 3912 class_state.check_finished(error) 3913 3914 check_for_include_what_you_use(filename, clean_lines, include_state, error) 3915 3916 # We check here rather than inside process_line so that we see raw 3917 # lines rather than "cleaned" lines. 3918 check_for_unicode_replacement_characters(lines, error) 3919 3920 check_for_new_line_at_eof(lines, error) 3921 3922 3923 class CppChecker(object): 3924 3925 """Processes C++ lines for checking style.""" 3926 3927 # This list is used to-- 3928 # 3929 # (1) generate an explicit list of all possible categories, 3930 # (2) unit test that all checked categories have valid names, and 3931 # (3) unit test that all categories are getting unit tested. 3932 # 3933 categories = set([ 3934 'build/class', 3935 'build/deprecated', 3936 'build/endif_comment', 3937 'build/forward_decl', 3938 'build/header_guard', 3939 'build/include', 3940 'build/include_order', 3941 'build/include_what_you_use', 3942 'build/namespaces', 3943 'build/printf_format', 3944 'build/storage_class', 3945 'build/using_std', 3946 'legal/copyright', 3947 'readability/braces', 3948 'readability/casting', 3949 'readability/check', 3950 'readability/comparison_to_zero', 3951 'readability/constructors', 3952 'readability/control_flow', 3953 'readability/enum_casing', 3954 'readability/fn_size', 3955 'readability/function', 3956 'readability/multiline_comment', 3957 'readability/multiline_string', 3958 'readability/parameter_name', 3959 'readability/naming', 3960 'readability/naming/underscores', 3961 'readability/null', 3962 'readability/pass_ptr', 3963 'readability/streams', 3964 'readability/todo', 3965 'readability/utf8', 3966 'readability/webkit_export', 3967 'runtime/arrays', 3968 'runtime/bitfields', 3969 'runtime/casting', 3970 'runtime/ctype_function', 3971 'runtime/explicit', 3972 'runtime/init', 3973 'runtime/int', 3974 'runtime/invalid_increment', 3975 'runtime/leaky_pattern', 3976 'runtime/max_min_macros', 3977 'runtime/memset', 3978 'runtime/printf', 3979 'runtime/printf_format', 3980 'runtime/references', 3981 'runtime/rtti', 3982 'runtime/sizeof', 3983 'runtime/string', 3984 'runtime/threadsafe_fn', 3985 'runtime/unsigned', 3986 'runtime/virtual', 3987 'whitespace/blank_line', 3988 'whitespace/braces', 3989 'whitespace/comma', 3990 'whitespace/comments', 3991 'whitespace/declaration', 3992 'whitespace/end_of_line', 3993 'whitespace/ending_newline', 3994 'whitespace/indent', 3995 'whitespace/line_length', 3996 'whitespace/newline', 3997 'whitespace/operators', 3998 'whitespace/parens', 3999 'whitespace/semicolon', 4000 'whitespace/tab', 4001 'whitespace/todo', 4002 ]) 4003 4004 fs = None 4005 4006 def __init__(self, file_path, file_extension, handle_style_error, 4007 min_confidence, fs=None): 4008 """Create a CppChecker instance. 4009 4010 Args: 4011 file_extension: A string that is the file extension, without 4012 the leading dot. 4013 4014 """ 4015 self.file_extension = file_extension 4016 self.file_path = file_path 4017 self.handle_style_error = handle_style_error 4018 self.min_confidence = min_confidence 4019 CppChecker.fs = fs or FileSystem() 4020 4021 # Useful for unit testing. 4022 def __eq__(self, other): 4023 """Return whether this CppChecker instance is equal to another.""" 4024 if self.file_extension != other.file_extension: 4025 return False 4026 if self.file_path != other.file_path: 4027 return False 4028 if self.handle_style_error != other.handle_style_error: 4029 return False 4030 if self.min_confidence != other.min_confidence: 4031 return False 4032 4033 return True 4034 4035 # Useful for unit testing. 4036 def __ne__(self, other): 4037 # Python does not automatically deduce __ne__() from __eq__(). 4038 return not self.__eq__(other) 4039 4040 def check(self, lines): 4041 _process_lines(self.file_path, self.file_extension, lines, 4042 self.handle_style_error, self.min_confidence) 4043 4044 4045 # FIXME: Remove this function (requires refactoring unit tests). 4046 def process_file_data(filename, file_extension, lines, error, min_confidence, fs=None): 4047 checker = CppChecker(filename, file_extension, error, min_confidence, fs) 4048 checker.check(lines) 4049