1 #!/usr/bin/perl -w 2 # 3 # Clean a patch file -- or directory of patch files -- of stealth whitespace. 4 # WARNING: this can be a highly destructive operation. Use with caution. 5 # 6 7 use bytes; 8 use File::Basename; 9 10 # Default options 11 $max_width = 79; 12 13 # Clean up space-tab sequences, either by removing spaces or 14 # replacing them with tabs. 15 sub clean_space_tabs($) 16 { 17 no bytes; # Tab alignment depends on characters 18 19 my($li) = @_; 20 my($lo) = ''; 21 my $pos = 0; 22 my $nsp = 0; 23 my($i, $c); 24 25 for ($i = 0; $i < length($li); $i++) { 26 $c = substr($li, $i, 1); 27 if ($c eq "\t") { 28 my $npos = ($pos+$nsp+8) & ~7; 29 my $ntab = ($npos >> 3) - ($pos >> 3); 30 $lo .= "\t" x $ntab; 31 $pos = $npos; 32 $nsp = 0; 33 } elsif ($c eq "\n" || $c eq "\r") { 34 $lo .= " " x $nsp; 35 $pos += $nsp; 36 $nsp = 0; 37 $lo .= $c; 38 $pos = 0; 39 } elsif ($c eq " ") { 40 $nsp++; 41 } else { 42 $lo .= " " x $nsp; 43 $pos += $nsp; 44 $nsp = 0; 45 $lo .= $c; 46 $pos++; 47 } 48 } 49 $lo .= " " x $nsp; 50 return $lo; 51 } 52 53 # Compute the visual width of a string 54 sub strwidth($) { 55 no bytes; # Tab alignment depends on characters 56 57 my($li) = @_; 58 my($c, $i); 59 my $pos = 0; 60 my $mlen = 0; 61 62 for ($i = 0; $i < length($li); $i++) { 63 $c = substr($li,$i,1); 64 if ($c eq "\t") { 65 $pos = ($pos+8) & ~7; 66 } elsif ($c eq "\n") { 67 $mlen = $pos if ($pos > $mlen); 68 $pos = 0; 69 } else { 70 $pos++; 71 } 72 } 73 74 $mlen = $pos if ($pos > $mlen); 75 return $mlen; 76 } 77 78 $name = basename($0); 79 80 @files = (); 81 82 while (defined($a = shift(@ARGV))) { 83 if ($a =~ /^-/) { 84 if ($a eq '-width' || $a eq '-w') { 85 $max_width = shift(@ARGV)+0; 86 } else { 87 print STDERR "Usage: $name [-width #] files...\n"; 88 exit 1; 89 } 90 } else { 91 push(@files, $a); 92 } 93 } 94 95 foreach $f ( @files ) { 96 print STDERR "$name: $f\n"; 97 98 if (! -f $f) { 99 print STDERR "$f: not a file\n"; 100 next; 101 } 102 103 if (!open(FILE, '+<', $f)) { 104 print STDERR "$name: Cannot open file: $f: $!\n"; 105 next; 106 } 107 108 binmode FILE; 109 110 # First, verify that it is not a binary file; consider any file 111 # with a zero byte to be a binary file. Is there any better, or 112 # additional, heuristic that should be applied? 113 $is_binary = 0; 114 115 while (read(FILE, $data, 65536) > 0) { 116 if ($data =~ /\0/) { 117 $is_binary = 1; 118 last; 119 } 120 } 121 122 if ($is_binary) { 123 print STDERR "$name: $f: binary file\n"; 124 next; 125 } 126 127 seek(FILE, 0, 0); 128 129 $in_bytes = 0; 130 $out_bytes = 0; 131 $lineno = 0; 132 133 @lines = (); 134 135 $in_hunk = 0; 136 $err = 0; 137 138 while ( defined($line = <FILE>) ) { 139 $lineno++; 140 $in_bytes += length($line); 141 142 if (!$in_hunk) { 143 if ($line =~ 144 /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) { 145 $minus_lines = $2; 146 $plus_lines = $4; 147 if ($minus_lines || $plus_lines) { 148 $in_hunk = 1; 149 @hunk_lines = ($line); 150 } 151 } else { 152 push(@lines, $line); 153 $out_bytes += length($line); 154 } 155 } else { 156 # We're in a hunk 157 158 if ($line =~ /^\+/) { 159 $plus_lines--; 160 161 $text = substr($line, 1); 162 $text =~ s/[ \t\r]*$//; # Remove trailing spaces 163 $text = clean_space_tabs($text); 164 165 $l_width = strwidth($text); 166 if ($max_width && $l_width > $max_width) { 167 print STDERR 168 "$f:$lineno: adds line exceeds $max_width ", 169 "characters ($l_width)\n"; 170 } 171 172 push(@hunk_lines, '+'.$text); 173 } elsif ($line =~ /^\-/) { 174 $minus_lines--; 175 push(@hunk_lines, $line); 176 } elsif ($line =~ /^ /) { 177 $plus_lines--; 178 $minus_lines--; 179 push(@hunk_lines, $line); 180 } else { 181 print STDERR "$name: $f: malformed patch\n"; 182 $err = 1; 183 last; 184 } 185 186 if ($plus_lines < 0 || $minus_lines < 0) { 187 print STDERR "$name: $f: malformed patch\n"; 188 $err = 1; 189 last; 190 } elsif ($plus_lines == 0 && $minus_lines == 0) { 191 # End of a hunk. Process this hunk. 192 my $i; 193 my $l; 194 my @h = (); 195 my $adj = 0; 196 my $done = 0; 197 198 for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) { 199 $l = $hunk_lines[$i]; 200 if (!$done && $l eq "+\n") { 201 $adj++; # Skip this line 202 } elsif ($l =~ /^[ +]/) { 203 $done = 1; 204 unshift(@h, $l); 205 } else { 206 unshift(@h, $l); 207 } 208 } 209 210 $l = $hunk_lines[0]; # Hunk header 211 undef @hunk_lines; # Free memory 212 213 if ($adj) { 214 die unless 215 ($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/); 216 my $mstart = $1; 217 my $mlin = $2; 218 my $pstart = $3; 219 my $plin = $4; 220 my $tail = $5; # doesn't include the final newline 221 222 $l = sprintf("@@ -%d,%d +%d,%d @@%s\n", 223 $mstart, $mlin, $pstart, $plin-$adj, 224 $tail); 225 } 226 unshift(@h, $l); 227 228 # Transfer to the output array 229 foreach $l (@h) { 230 $out_bytes += length($l); 231 push(@lines, $l); 232 } 233 234 $in_hunk = 0; 235 } 236 } 237 } 238 239 if ($in_hunk) { 240 print STDERR "$name: $f: malformed patch\n"; 241 $err = 1; 242 } 243 244 if (!$err) { 245 if ($in_bytes != $out_bytes) { 246 # Only write to the file if changed 247 seek(FILE, 0, 0); 248 print FILE @lines; 249 250 if ( !defined($where = tell(FILE)) || 251 !truncate(FILE, $where) ) { 252 die "$name: Failed to truncate modified file: $f: $!\n"; 253 } 254 } 255 } 256 257 close(FILE); 258 } 259