1 #!/usr/bin/perl -w 2 # Copyright 2013 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 # Use: echo filename1.cc ... | find_copyrights.pl 7 # or: find_copyrights.pl list_file 8 # or: find_files.pl ... | find_copyrights.pl 9 10 use strict; 11 use warnings; 12 use File::Basename; 13 14 sub check_is_generated_file($); 15 sub start_copyright_parsing(); 16 17 my $progname = basename($0); 18 19 my $generated_file_scan_boundary = 25; 20 while (<>) { 21 chomp; 22 my $file = $_; 23 my $file_header = ''; 24 my %copyrights; 25 open (F, "<$file") or die "$progname: Unable to access $file\n"; 26 my $parse_copyright = start_copyright_parsing(); 27 while (<F>) { 28 $file_header .= $_ unless $. > $generated_file_scan_boundary; 29 my $copyright_match = $parse_copyright->($_, $.); 30 if ($copyright_match) { 31 $copyrights{lc("$copyright_match")} = "$copyright_match"; 32 } 33 } 34 close(F); 35 my $copyright = join(" / ", sort values %copyrights); 36 print "$file\t"; 37 if (check_is_generated_file($file_header)) { 38 print "GENERATED FILE"; 39 } else { 40 print ($copyright or "*No copyright*"); 41 } 42 print "\n"; 43 } 44 45 sub check_is_generated_file($) { 46 my $license = uc($_[0]); 47 # Remove Python multiline comments to avoid false positives 48 if (index($license, '"""') != -1) { 49 $license =~ s/"""[^"]*(?:"""|$)//mg; 50 } 51 if (index($license, "'''") != -1) { 52 $license =~ s/'''[^']*(?:'''|$)//mg; 53 } 54 # Quick checks using index. 55 if (index($license, 'ALL CHANGES MADE IN THIS FILE WILL BE LOST') != -1) { 56 return 1; 57 } 58 if (index($license, 'DO NOT EDIT') != -1 || 59 index($license, 'DO NOT DELETE') != -1 || 60 index($license, 'GENERATED') != -1) { 61 return ($license =~ /(All changes made in this file will be lost' . 62 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' . 63 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i); 64 } 65 return 0; 66 } 67 68 sub are_within_increasing_progression($$$) { 69 my $delta = $_[0] - $_[1]; 70 return $delta >= 0 && $delta <= $_[2]; 71 } 72 73 sub start_copyright_parsing() { 74 my $max_line_numbers_proximity = 3; 75 # Set up the defaults the way that proximity checks will not succeed. 76 my $last_a_item_line_number = -200; 77 my $last_b_item_line_number = -100; 78 79 return sub { 80 my $line = $_[0]; 81 my $line_number = $_[1]; 82 83 # Remove C / C++ strings to avoid false positives. 84 if (index($line, '"') != -1) { 85 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g; 86 } 87 88 my $uc_line = uc($line); 89 90 # Record '(a)' and '(b)' last occurences in C++ comments. 91 my $cpp_comment_idx = index($uc_line, '//'); 92 if ($cpp_comment_idx != -1) { 93 if (index($uc_line, '(A)') > $cpp_comment_idx) { 94 $last_a_item_line_number = $line_number; 95 } 96 if (index($uc_line, '(B)') > $cpp_comment_idx) { 97 $last_b_item_line_number = $line_number; 98 } 99 } 100 101 # Fast bailout, uses the same patterns as the regexp. 102 if (index($uc_line, 'COPYRIGHT') == -1 && 103 index($uc_line, 'COPR.') == -1 && 104 index($uc_line, '\x{00a9}') == -1 && 105 index($uc_line, '\xc2\xa9') == -1) { 106 107 my $c_item_index = index($uc_line, '(C)'); 108 return '' if ($c_item_index == -1); 109 # Filter out 'c' used as a list item inside C++ comments. 110 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah" 111 if ($c_item_index > $cpp_comment_idx && 112 are_within_increasing_progression( 113 $line_number, 114 $last_b_item_line_number, 115 $max_line_numbers_proximity) && 116 are_within_increasing_progression( 117 $last_b_item_line_number, 118 $last_a_item_line_number, 119 $max_line_numbers_proximity)) { 120 return ''; 121 } 122 } 123 124 my $copyright_indicator_regex = 125 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))'; 126 my $full_copyright_indicator_regex = 127 sprintf '(?:\W|^)%s(?::\s*|\s+)(\w.*)$', $copyright_indicator_regex; 128 my $copyright_disindicator_regex = 129 '\b(?:info(?:rmation)?|notice|and|or)\b'; 130 131 my $copyright = ''; 132 if ($line =~ m%$full_copyright_indicator_regex%i) { 133 my $match = $1; 134 if ($match !~ m%^\s*$copyright_disindicator_regex%i) { 135 $match =~ s/([,.])?\s*$//; 136 $match =~ s/$copyright_indicator_regex//ig; 137 $match =~ s/^\s+//; 138 $match =~ s/\s{2,}/ /g; 139 $match =~ s/\\@/@/g; 140 $copyright = $match; 141 } 142 } 143 144 return $copyright; 145 } 146 } 147