Back to home page

LXR

 
 

    


0001 #!/usr/bin/perl -w
0002 #
0003 # Clean a text file -- or directory of text files -- of stealth whitespace.
0004 # WARNING: this can be a highly destructive operation.  Use with caution.
0005 #
0006 
0007 use bytes;
0008 use File::Basename;
0009 
0010 # Default options
0011 $max_width = 79;
0012 
0013 # Clean up space-tab sequences, either by removing spaces or
0014 # replacing them with tabs.
0015 sub clean_space_tabs($)
0016 {
0017     no bytes;           # Tab alignment depends on characters
0018 
0019     my($li) = @_;
0020     my($lo) = '';
0021     my $pos = 0;
0022     my $nsp = 0;
0023     my($i, $c);
0024 
0025     for ($i = 0; $i < length($li); $i++) {
0026     $c = substr($li, $i, 1);
0027     if ($c eq "\t") {
0028         my $npos = ($pos+$nsp+8) & ~7;
0029         my $ntab = ($npos >> 3) - ($pos >> 3);
0030         $lo .= "\t" x $ntab;
0031         $pos = $npos;
0032         $nsp = 0;
0033     } elsif ($c eq "\n" || $c eq "\r") {
0034         $lo .= " " x $nsp;
0035         $pos += $nsp;
0036         $nsp = 0;
0037         $lo .= $c;
0038         $pos = 0;
0039     } elsif ($c eq " ") {
0040         $nsp++;
0041     } else {
0042         $lo .= " " x $nsp;
0043         $pos += $nsp;
0044         $nsp = 0;
0045         $lo .= $c;
0046         $pos++;
0047     }
0048     }
0049     $lo .= " " x $nsp;
0050     return $lo;
0051 }
0052 
0053 # Compute the visual width of a string
0054 sub strwidth($) {
0055     no bytes;           # Tab alignment depends on characters
0056 
0057     my($li) = @_;
0058     my($c, $i);
0059     my $pos = 0;
0060     my $mlen = 0;
0061 
0062     for ($i = 0; $i < length($li); $i++) {
0063     $c = substr($li,$i,1);
0064     if ($c eq "\t") {
0065         $pos = ($pos+8) & ~7;
0066     } elsif ($c eq "\n") {
0067         $mlen = $pos if ($pos > $mlen);
0068         $pos = 0;
0069     } else {
0070         $pos++;
0071     }
0072     }
0073 
0074     $mlen = $pos if ($pos > $mlen);
0075     return $mlen;
0076 }
0077 
0078 $name = basename($0);
0079 
0080 @files = ();
0081 
0082 while (defined($a = shift(@ARGV))) {
0083     if ($a =~ /^-/) {
0084     if ($a eq '-width' || $a eq '-w') {
0085         $max_width = shift(@ARGV)+0;
0086     } else {
0087         print STDERR "Usage: $name [-width #] files...\n";
0088         exit 1;
0089     }
0090     } else {
0091     push(@files, $a);
0092     }
0093 }
0094 
0095 foreach $f ( @files ) {
0096     print STDERR "$name: $f\n";
0097 
0098     if (! -f $f) {
0099     print STDERR "$f: not a file\n";
0100     next;
0101     }
0102 
0103     if (!open(FILE, '+<', $f)) {
0104     print STDERR "$name: Cannot open file: $f: $!\n";
0105     next;
0106     }
0107 
0108     binmode FILE;
0109 
0110     # First, verify that it is not a binary file; consider any file
0111     # with a zero byte to be a binary file.  Is there any better, or
0112     # additional, heuristic that should be applied?
0113     $is_binary = 0;
0114 
0115     while (read(FILE, $data, 65536) > 0) {
0116     if ($data =~ /\0/) {
0117         $is_binary = 1;
0118         last;
0119     }
0120     }
0121 
0122     if ($is_binary) {
0123     print STDERR "$name: $f: binary file\n";
0124     next;
0125     }
0126 
0127     seek(FILE, 0, 0);
0128 
0129     $in_bytes = 0;
0130     $out_bytes = 0;
0131     $blank_bytes = 0;
0132 
0133     @blanks = ();
0134     @lines  = ();
0135     $lineno = 0;
0136 
0137     while ( defined($line = <FILE>) ) {
0138     $lineno++;
0139     $in_bytes += length($line);
0140     $line =~ s/[ \t\r]*$//;     # Remove trailing spaces
0141     $line = clean_space_tabs($line);
0142 
0143     if ( $line eq "\n" ) {
0144         push(@blanks, $line);
0145         $blank_bytes += length($line);
0146     } else {
0147         push(@lines, @blanks);
0148         $out_bytes += $blank_bytes;
0149         push(@lines, $line);
0150         $out_bytes += length($line);
0151         @blanks = ();
0152         $blank_bytes = 0;
0153     }
0154 
0155     $l_width = strwidth($line);
0156     if ($max_width && $l_width > $max_width) {
0157         print STDERR
0158         "$f:$lineno: line exceeds $max_width characters ($l_width)\n";
0159     }
0160     }
0161 
0162     # Any blanks at the end of the file are discarded
0163 
0164     if ($in_bytes != $out_bytes) {
0165     # Only write to the file if changed
0166     seek(FILE, 0, 0);
0167     print FILE @lines;
0168 
0169     if ( !defined($where = tell(FILE)) ||
0170          !truncate(FILE, $where) ) {
0171         die "$name: Failed to truncate modified file: $f: $!\n";
0172     }
0173     }
0174 
0175     close(FILE);
0176 }