Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/perl -w
      2 # Copyright 2013 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 # Use: echo filename1.cc ... | find_copyrights.pl
      7 #  or: find_copyrights.pl list_file
      8 #  or: find_files.pl ... | find_copyrights.pl
      9 
     10 use strict;
     11 use warnings;
     12 use File::Basename;
     13 
     14 sub check_is_generated_file($);
     15 sub start_copyright_parsing();
     16 
     17 my $progname = basename($0);
     18 
     19 my $generated_file_scan_boundary = 25;
     20 while (<>) {
     21     chomp;
     22     my $file = $_;
     23     my $file_header = '';
     24     my %copyrights;
     25     open (F, "<$file") or die "$progname: Unable to access $file\n";
     26     my $parse_copyright = start_copyright_parsing();
     27     while (<F>) {
     28         $file_header .= $_ unless $. > $generated_file_scan_boundary;
     29         my $copyright_match = $parse_copyright->($_, $.);
     30         if ($copyright_match) {
     31             $copyrights{lc("$copyright_match")} = "$copyright_match";
     32         }
     33     }
     34     close(F);
     35     my $copyright = join(" / ", sort values %copyrights);
     36     print "$file\t";
     37     if (check_is_generated_file($file_header)) {
     38         print "GENERATED FILE";
     39     } else {
     40         print ($copyright or "*No copyright*");
     41     }
     42     print "\n";
     43 }
     44 
     45 sub check_is_generated_file($) {
     46     my $license = uc($_[0]);
     47     # Remove Python multiline comments to avoid false positives
     48     if (index($license, '"""') != -1) {
     49         $license =~ s/"""[^"]*(?:"""|$)//mg;
     50     }
     51     if (index($license, "'''") != -1) {
     52         $license =~ s/'''[^']*(?:'''|$)//mg;
     53     }
     54     # Quick checks using index.
     55     if (index($license, 'ALL CHANGES MADE IN THIS FILE WILL BE LOST') != -1) {
     56         return 1;
     57     }
     58     if (index($license, 'DO NOT EDIT') != -1 ||
     59         index($license, 'DO NOT DELETE') != -1 ||
     60         index($license, 'GENERATED') != -1) {
     61         return ($license =~ /(All changes made in this file will be lost' .
     62             'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' .
     63             '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i);
     64     }
     65     return 0;
     66 }
     67 
     68 sub are_within_increasing_progression($$$) {
     69     my $delta = $_[0] - $_[1];
     70     return $delta >= 0 && $delta <= $_[2];
     71 }
     72 
     73 sub start_copyright_parsing() {
     74     my $max_line_numbers_proximity = 3;
     75     # Set up the defaults the way that proximity checks will not succeed.
     76     my $last_a_item_line_number = -200;
     77     my $last_b_item_line_number = -100;
     78 
     79     return sub {
     80         my $line = $_[0];
     81         my $line_number = $_[1];
     82 
     83         # Remove C / C++ strings to avoid false positives.
     84         if (index($line, '"') != -1) {
     85             $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g;
     86         }
     87 
     88         my $uc_line = uc($line);
     89 
     90         # Record '(a)' and '(b)' last occurences in C++ comments.
     91         my $cpp_comment_idx = index($uc_line, '//');
     92         if ($cpp_comment_idx != -1) {
     93             if (index($uc_line, '(A)') > $cpp_comment_idx) {
     94                 $last_a_item_line_number = $line_number;
     95             }
     96             if (index($uc_line, '(B)') > $cpp_comment_idx) {
     97                 $last_b_item_line_number = $line_number;
     98             }
     99         }
    100 
    101         # Fast bailout, uses the same patterns as the regexp.
    102         if (index($uc_line, 'COPYRIGHT') == -1 &&
    103             index($uc_line, 'COPR.') == -1 &&
    104             index($uc_line, '\x{00a9}') == -1 &&
    105             index($uc_line, '\xc2\xa9') == -1) {
    106 
    107             my $c_item_index = index($uc_line, '(C)');
    108             return '' if ($c_item_index == -1);
    109             # Filter out 'c' used as a list item inside C++ comments.
    110             # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
    111             if ($c_item_index > $cpp_comment_idx &&
    112                 are_within_increasing_progression(
    113                     $line_number,
    114                     $last_b_item_line_number,
    115                     $max_line_numbers_proximity) &&
    116                 are_within_increasing_progression(
    117                     $last_b_item_line_number,
    118                     $last_a_item_line_number,
    119                     $max_line_numbers_proximity)) {
    120                 return '';
    121             }
    122         }
    123 
    124         my $copyright_indicator_regex =
    125             '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))';
    126         my $full_copyright_indicator_regex =
    127             sprintf '(?:\W|^)%s(?::\s*|\s+)(\w.*)$', $copyright_indicator_regex;
    128         my $copyright_disindicator_regex =
    129             '\b(?:info(?:rmation)?|notice|and|or)\b';
    130 
    131         my $copyright = '';
    132         if ($line =~ m%$full_copyright_indicator_regex%i) {
    133             my $match = $1;
    134             if ($match !~ m%^\s*$copyright_disindicator_regex%i) {
    135                 $match =~ s/([,.])?\s*$//;
    136                 $match =~ s/$copyright_indicator_regex//ig;
    137                 $match =~ s/^\s+//;
    138                 $match =~ s/\s{2,}/ /g;
    139                 $match =~ s/\\@/@/g;
    140                 $copyright = $match;
    141             }
    142         }
    143 
    144         return $copyright;
    145     }
    146 }
    147