0001 #!/usr/bin/env perl
0002 # SPDX-License-Identifier: GPL-2.0
0003 #
0004 # Clean a patch file -- or directory of patch files -- of stealth whitespace.
0005 # WARNING: this can be a highly destructive operation. Use with caution.
0006 #
0007
0008 use warnings;
0009 use bytes;
0010 use File::Basename;
0011
0012 # Default options
0013 $max_width = 79;
0014
0015 # Clean up space-tab sequences, either by removing spaces or
0016 # replacing them with tabs.
0017 sub clean_space_tabs($)
0018 {
0019 no bytes; # Tab alignment depends on characters
0020
0021 my($li) = @_;
0022 my($lo) = '';
0023 my $pos = 0;
0024 my $nsp = 0;
0025 my($i, $c);
0026
0027 for ($i = 0; $i < length($li); $i++) {
0028 $c = substr($li, $i, 1);
0029 if ($c eq "\t") {
0030 my $npos = ($pos+$nsp+8) & ~7;
0031 my $ntab = ($npos >> 3) - ($pos >> 3);
0032 $lo .= "\t" x $ntab;
0033 $pos = $npos;
0034 $nsp = 0;
0035 } elsif ($c eq "\n" || $c eq "\r") {
0036 $lo .= " " x $nsp;
0037 $pos += $nsp;
0038 $nsp = 0;
0039 $lo .= $c;
0040 $pos = 0;
0041 } elsif ($c eq " ") {
0042 $nsp++;
0043 } else {
0044 $lo .= " " x $nsp;
0045 $pos += $nsp;
0046 $nsp = 0;
0047 $lo .= $c;
0048 $pos++;
0049 }
0050 }
0051 $lo .= " " x $nsp;
0052 return $lo;
0053 }
0054
0055 # Compute the visual width of a string
0056 sub strwidth($) {
0057 no bytes; # Tab alignment depends on characters
0058
0059 my($li) = @_;
0060 my($c, $i);
0061 my $pos = 0;
0062 my $mlen = 0;
0063
0064 for ($i = 0; $i < length($li); $i++) {
0065 $c = substr($li,$i,1);
0066 if ($c eq "\t") {
0067 $pos = ($pos+8) & ~7;
0068 } elsif ($c eq "\n") {
0069 $mlen = $pos if ($pos > $mlen);
0070 $pos = 0;
0071 } else {
0072 $pos++;
0073 }
0074 }
0075
0076 $mlen = $pos if ($pos > $mlen);
0077 return $mlen;
0078 }
0079
0080 $name = basename($0);
0081
0082 @files = ();
0083
0084 while (defined($a = shift(@ARGV))) {
0085 if ($a =~ /^-/) {
0086 if ($a eq '-width' || $a eq '-w') {
0087 $max_width = shift(@ARGV)+0;
0088 } else {
0089 print STDERR "Usage: $name [-width #] files...\n";
0090 exit 1;
0091 }
0092 } else {
0093 push(@files, $a);
0094 }
0095 }
0096
0097 foreach $f ( @files ) {
0098 print STDERR "$name: $f\n";
0099
0100 if (! -f $f) {
0101 print STDERR "$f: not a file\n";
0102 next;
0103 }
0104
0105 if (!open(FILE, '+<', $f)) {
0106 print STDERR "$name: Cannot open file: $f: $!\n";
0107 next;
0108 }
0109
0110 binmode FILE;
0111
0112 # First, verify that it is not a binary file; consider any file
0113 # with a zero byte to be a binary file. Is there any better, or
0114 # additional, heuristic that should be applied?
0115 $is_binary = 0;
0116
0117 while (read(FILE, $data, 65536) > 0) {
0118 if ($data =~ /\0/) {
0119 $is_binary = 1;
0120 last;
0121 }
0122 }
0123
0124 if ($is_binary) {
0125 print STDERR "$name: $f: binary file\n";
0126 next;
0127 }
0128
0129 seek(FILE, 0, 0);
0130
0131 $in_bytes = 0;
0132 $out_bytes = 0;
0133 $lineno = 0;
0134
0135 @lines = ();
0136
0137 $in_hunk = 0;
0138 $err = 0;
0139
0140 while ( defined($line = <FILE>) ) {
0141 $lineno++;
0142 $in_bytes += length($line);
0143
0144 if (!$in_hunk) {
0145 if ($line =~
0146 /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) {
0147 $minus_lines = $2;
0148 $plus_lines = $4;
0149 if ($minus_lines || $plus_lines) {
0150 $in_hunk = 1;
0151 @hunk_lines = ($line);
0152 }
0153 } else {
0154 push(@lines, $line);
0155 $out_bytes += length($line);
0156 }
0157 } else {
0158 # We're in a hunk
0159
0160 if ($line =~ /^\+/) {
0161 $plus_lines--;
0162
0163 $text = substr($line, 1);
0164 $text =~ s/[ \t\r]*$//; # Remove trailing spaces
0165 $text = clean_space_tabs($text);
0166
0167 $l_width = strwidth($text);
0168 if ($max_width && $l_width > $max_width) {
0169 print STDERR
0170 "$f:$lineno: adds line exceeds $max_width ",
0171 "characters ($l_width)\n";
0172 }
0173
0174 push(@hunk_lines, '+'.$text);
0175 } elsif ($line =~ /^\-/) {
0176 $minus_lines--;
0177 push(@hunk_lines, $line);
0178 } elsif ($line =~ /^ /) {
0179 $plus_lines--;
0180 $minus_lines--;
0181 push(@hunk_lines, $line);
0182 } else {
0183 print STDERR "$name: $f: malformed patch\n";
0184 $err = 1;
0185 last;
0186 }
0187
0188 if ($plus_lines < 0 || $minus_lines < 0) {
0189 print STDERR "$name: $f: malformed patch\n";
0190 $err = 1;
0191 last;
0192 } elsif ($plus_lines == 0 && $minus_lines == 0) {
0193 # End of a hunk. Process this hunk.
0194 my $i;
0195 my $l;
0196 my @h = ();
0197 my $adj = 0;
0198 my $done = 0;
0199
0200 for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) {
0201 $l = $hunk_lines[$i];
0202 if (!$done && $l eq "+\n") {
0203 $adj++; # Skip this line
0204 } elsif ($l =~ /^[ +]/) {
0205 $done = 1;
0206 unshift(@h, $l);
0207 } else {
0208 unshift(@h, $l);
0209 }
0210 }
0211
0212 $l = $hunk_lines[0]; # Hunk header
0213 undef @hunk_lines; # Free memory
0214
0215 if ($adj) {
0216 die unless
0217 ($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/);
0218 my $mstart = $1;
0219 my $mlin = $2;
0220 my $pstart = $3;
0221 my $plin = $4;
0222 my $tail = $5; # doesn't include the final newline
0223
0224 $l = sprintf("@@ -%d,%d +%d,%d @@%s\n",
0225 $mstart, $mlin, $pstart, $plin-$adj,
0226 $tail);
0227 }
0228 unshift(@h, $l);
0229
0230 # Transfer to the output array
0231 foreach $l (@h) {
0232 $out_bytes += length($l);
0233 push(@lines, $l);
0234 }
0235
0236 $in_hunk = 0;
0237 }
0238 }
0239 }
0240
0241 if ($in_hunk) {
0242 print STDERR "$name: $f: malformed patch\n";
0243 $err = 1;
0244 }
0245
0246 if (!$err) {
0247 if ($in_bytes != $out_bytes) {
0248 # Only write to the file if changed
0249 seek(FILE, 0, 0);
0250 print FILE @lines;
0251
0252 if ( !defined($where = tell(FILE)) ||
0253 !truncate(FILE, $where) ) {
0254 die "$name: Failed to truncate modified file: $f: $!\n";
0255 }
0256 }
0257 }
0258
0259 close(FILE);
0260 }