Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/perl -w
      2 #
      3 # Clean a patch file -- or directory of patch files -- of stealth whitespace.
      4 # WARNING: this can be a highly destructive operation.  Use with caution.
      5 #
      6 
      7 use bytes;
      8 use File::Basename;
      9 
     10 # Default options
     11 $max_width = 79;
     12 
     13 # Clean up space-tab sequences, either by removing spaces or
     14 # replacing them with tabs.
     15 sub clean_space_tabs($)
     16 {
     17     no bytes;			# Tab alignment depends on characters
     18 
     19     my($li) = @_;
     20     my($lo) = '';
     21     my $pos = 0;
     22     my $nsp = 0;
     23     my($i, $c);
     24 
     25     for ($i = 0; $i < length($li); $i++) {
     26 	$c = substr($li, $i, 1);
     27 	if ($c eq "\t") {
     28 	    my $npos = ($pos+$nsp+8) & ~7;
     29 	    my $ntab = ($npos >> 3) - ($pos >> 3);
     30 	    $lo .= "\t" x $ntab;
     31 	    $pos = $npos;
     32 	    $nsp = 0;
     33 	} elsif ($c eq "\n" || $c eq "\r") {
     34 	    $lo .= " " x $nsp;
     35 	    $pos += $nsp;
     36 	    $nsp = 0;
     37 	    $lo .= $c;
     38 	    $pos = 0;
     39 	} elsif ($c eq " ") {
     40 	    $nsp++;
     41 	} else {
     42 	    $lo .= " " x $nsp;
     43 	    $pos += $nsp;
     44 	    $nsp = 0;
     45 	    $lo .= $c;
     46 	    $pos++;
     47 	}
     48     }
     49     $lo .= " " x $nsp;
     50     return $lo;
     51 }
     52 
     53 # Compute the visual width of a string
     54 sub strwidth($) {
     55     no bytes;			# Tab alignment depends on characters
     56 
     57     my($li) = @_;
     58     my($c, $i);
     59     my $pos = 0;
     60     my $mlen = 0;
     61 
     62     for ($i = 0; $i < length($li); $i++) {
     63 	$c = substr($li,$i,1);
     64 	if ($c eq "\t") {
     65 	    $pos = ($pos+8) & ~7;
     66 	} elsif ($c eq "\n") {
     67 	    $mlen = $pos if ($pos > $mlen);
     68 	    $pos = 0;
     69 	} else {
     70 	    $pos++;
     71 	}
     72     }
     73 
     74     $mlen = $pos if ($pos > $mlen);
     75     return $mlen;
     76 }
     77 
     78 $name = basename($0);
     79 
     80 @files = ();
     81 
     82 while (defined($a = shift(@ARGV))) {
     83     if ($a =~ /^-/) {
     84 	if ($a eq '-width' || $a eq '-w') {
     85 	    $max_width = shift(@ARGV)+0;
     86 	} else {
     87 	    print STDERR "Usage: $name [-width #] files...\n";
     88 	    exit 1;
     89 	}
     90     } else {
     91 	push(@files, $a);
     92     }
     93 }
     94 
     95 foreach $f ( @files ) {
     96     print STDERR "$name: $f\n";
     97 
     98     if (! -f $f) {
     99 	print STDERR "$f: not a file\n";
    100 	next;
    101     }
    102 
    103     if (!open(FILE, '+<', $f)) {
    104 	print STDERR "$name: Cannot open file: $f: $!\n";
    105 	next;
    106     }
    107 
    108     binmode FILE;
    109 
    110     # First, verify that it is not a binary file; consider any file
    111     # with a zero byte to be a binary file.  Is there any better, or
    112     # additional, heuristic that should be applied?
    113     $is_binary = 0;
    114 
    115     while (read(FILE, $data, 65536) > 0) {
    116 	if ($data =~ /\0/) {
    117 	    $is_binary = 1;
    118 	    last;
    119 	}
    120     }
    121 
    122     if ($is_binary) {
    123 	print STDERR "$name: $f: binary file\n";
    124 	next;
    125     }
    126 
    127     seek(FILE, 0, 0);
    128 
    129     $in_bytes = 0;
    130     $out_bytes = 0;
    131     $lineno = 0;
    132 
    133     @lines  = ();
    134 
    135     $in_hunk = 0;
    136     $err = 0;
    137 
    138     while ( defined($line = <FILE>) ) {
    139 	$lineno++;
    140 	$in_bytes += length($line);
    141 
    142 	if (!$in_hunk) {
    143 	    if ($line =~
    144 		/^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) {
    145 		$minus_lines = $2;
    146 		$plus_lines = $4;
    147 		if ($minus_lines || $plus_lines) {
    148 		    $in_hunk = 1;
    149 		    @hunk_lines = ($line);
    150 		}
    151 	    } else {
    152 		push(@lines, $line);
    153 		$out_bytes += length($line);
    154 	    }
    155 	} else {
    156 	    # We're in a hunk
    157 
    158 	    if ($line =~ /^\+/) {
    159 		$plus_lines--;
    160 
    161 		$text = substr($line, 1);
    162 		$text =~ s/[ \t\r]*$//;		# Remove trailing spaces
    163 		$text = clean_space_tabs($text);
    164 
    165 		$l_width = strwidth($text);
    166 		if ($max_width && $l_width > $max_width) {
    167 		    print STDERR
    168 			"$f:$lineno: adds line exceeds $max_width ",
    169 			"characters ($l_width)\n";
    170 		}
    171 
    172 		push(@hunk_lines, '+'.$text);
    173 	    } elsif ($line =~ /^\-/) {
    174 		$minus_lines--;
    175 		push(@hunk_lines, $line);
    176 	    } elsif ($line =~ /^ /) {
    177 		$plus_lines--;
    178 		$minus_lines--;
    179 		push(@hunk_lines, $line);
    180 	    } else {
    181 		print STDERR "$name: $f: malformed patch\n";
    182 		$err = 1;
    183 		last;
    184 	    }
    185 
    186 	    if ($plus_lines < 0 || $minus_lines < 0) {
    187 		print STDERR "$name: $f: malformed patch\n";
    188 		$err = 1;
    189 		last;
    190 	    } elsif ($plus_lines == 0 && $minus_lines == 0) {
    191 		# End of a hunk.  Process this hunk.
    192 		my $i;
    193 		my $l;
    194 		my @h = ();
    195 		my $adj = 0;
    196 		my $done = 0;
    197 
    198 		for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) {
    199 		    $l = $hunk_lines[$i];
    200 		    if (!$done && $l eq "+\n") {
    201 			$adj++; # Skip this line
    202 		    } elsif ($l =~ /^[ +]/) {
    203 			$done = 1;
    204 			unshift(@h, $l);
    205 		    } else {
    206 			unshift(@h, $l);
    207 		    }
    208 		}
    209 
    210 		$l = $hunk_lines[0];  # Hunk header
    211 		undef @hunk_lines;    # Free memory
    212 
    213 		if ($adj) {
    214 		    die unless
    215 			($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/);
    216 		    my $mstart = $1;
    217 		    my $mlin = $2;
    218 		    my $pstart = $3;
    219 		    my $plin = $4;
    220 		    my $tail = $5; # doesn't include the final newline
    221 
    222 		    $l = sprintf("@@ -%d,%d +%d,%d @@%s\n",
    223 				 $mstart, $mlin, $pstart, $plin-$adj,
    224 				 $tail);
    225 		}
    226 		unshift(@h, $l);
    227 
    228 		# Transfer to the output array
    229 		foreach $l (@h) {
    230 		    $out_bytes += length($l);
    231 		    push(@lines, $l);
    232 		}
    233 
    234 		$in_hunk = 0;
    235 	    }
    236 	}
    237     }
    238 
    239     if ($in_hunk) {
    240 	print STDERR "$name: $f: malformed patch\n";
    241 	$err = 1;
    242     }
    243 
    244     if (!$err) {
    245 	if ($in_bytes != $out_bytes) {
    246 	    # Only write to the file if changed
    247 	    seek(FILE, 0, 0);
    248 	    print FILE @lines;
    249 
    250 	    if ( !defined($where = tell(FILE)) ||
    251 		 !truncate(FILE, $where) ) {
    252 		die "$name: Failed to truncate modified file: $f: $!\n";
    253 	    }
    254 	}
    255     }
    256 
    257     close(FILE);
    258 }
    259