Back to home page

OSCL-LXR

 
 

    


0001 #!/usr/bin/env perl
0002 # SPDX-License-Identifier: GPL-2.0
0003 #
0004 # Clean a text file -- or directory of text files -- of stealth whitespace.
0005 # WARNING: this can be a highly destructive operation.  Use with caution.
0006 #
0007 
0008 use warnings;
0009 use bytes;
0010 use File::Basename;
0011 
0012 # Default options
0013 $max_width = 79;
0014 
0015 # Clean up space-tab sequences, either by removing spaces or
0016 # replacing them with tabs.
0017 sub clean_space_tabs($)
0018 {
0019     no bytes;                   # Tab alignment depends on characters
0020 
0021     my($li) = @_;
0022     my($lo) = '';
0023     my $pos = 0;
0024     my $nsp = 0;
0025     my($i, $c);
0026 
0027     for ($i = 0; $i < length($li); $i++) {
0028         $c = substr($li, $i, 1);
0029         if ($c eq "\t") {
0030             my $npos = ($pos+$nsp+8) & ~7;
0031             my $ntab = ($npos >> 3) - ($pos >> 3);
0032             $lo .= "\t" x $ntab;
0033             $pos = $npos;
0034             $nsp = 0;
0035         } elsif ($c eq "\n" || $c eq "\r") {
0036             $lo .= " " x $nsp;
0037             $pos += $nsp;
0038             $nsp = 0;
0039             $lo .= $c;
0040             $pos = 0;
0041         } elsif ($c eq " ") {
0042             $nsp++;
0043         } else {
0044             $lo .= " " x $nsp;
0045             $pos += $nsp;
0046             $nsp = 0;
0047             $lo .= $c;
0048             $pos++;
0049         }
0050     }
0051     $lo .= " " x $nsp;
0052     return $lo;
0053 }
0054 
0055 # Compute the visual width of a string
0056 sub strwidth($) {
0057     no bytes;                   # Tab alignment depends on characters
0058 
0059     my($li) = @_;
0060     my($c, $i);
0061     my $pos = 0;
0062     my $mlen = 0;
0063 
0064     for ($i = 0; $i < length($li); $i++) {
0065         $c = substr($li,$i,1);
0066         if ($c eq "\t") {
0067             $pos = ($pos+8) & ~7;
0068         } elsif ($c eq "\n") {
0069             $mlen = $pos if ($pos > $mlen);
0070             $pos = 0;
0071         } else {
0072             $pos++;
0073         }
0074     }
0075 
0076     $mlen = $pos if ($pos > $mlen);
0077     return $mlen;
0078 }
0079 
0080 $name = basename($0);
0081 
0082 @files = ();
0083 
0084 while (defined($a = shift(@ARGV))) {
0085     if ($a =~ /^-/) {
0086         if ($a eq '-width' || $a eq '-w') {
0087             $max_width = shift(@ARGV)+0;
0088         } else {
0089             print STDERR "Usage: $name [-width #] files...\n";
0090             exit 1;
0091         }
0092     } else {
0093         push(@files, $a);
0094     }
0095 }
0096 
0097 foreach $f ( @files ) {
0098     print STDERR "$name: $f\n";
0099 
0100     if (! -f $f) {
0101         print STDERR "$f: not a file\n";
0102         next;
0103     }
0104 
0105     if (!open(FILE, '+<', $f)) {
0106         print STDERR "$name: Cannot open file: $f: $!\n";
0107         next;
0108     }
0109 
0110     binmode FILE;
0111 
0112     # First, verify that it is not a binary file; consider any file
0113     # with a zero byte to be a binary file.  Is there any better, or
0114     # additional, heuristic that should be applied?
0115     $is_binary = 0;
0116 
0117     while (read(FILE, $data, 65536) > 0) {
0118         if ($data =~ /\0/) {
0119             $is_binary = 1;
0120             last;
0121         }
0122     }
0123 
0124     if ($is_binary) {
0125         print STDERR "$name: $f: binary file\n";
0126         next;
0127     }
0128 
0129     seek(FILE, 0, 0);
0130 
0131     $in_bytes = 0;
0132     $out_bytes = 0;
0133     $blank_bytes = 0;
0134 
0135     @blanks = ();
0136     @lines  = ();
0137     $lineno = 0;
0138 
0139     while ( defined($line = <FILE>) ) {
0140         $lineno++;
0141         $in_bytes += length($line);
0142         $line =~ s/[ \t\r]*$//;         # Remove trailing spaces
0143         $line = clean_space_tabs($line);
0144 
0145         if ( $line eq "\n" ) {
0146             push(@blanks, $line);
0147             $blank_bytes += length($line);
0148         } else {
0149             push(@lines, @blanks);
0150             $out_bytes += $blank_bytes;
0151             push(@lines, $line);
0152             $out_bytes += length($line);
0153             @blanks = ();
0154             $blank_bytes = 0;
0155         }
0156 
0157         $l_width = strwidth($line);
0158         if ($max_width && $l_width > $max_width) {
0159             print STDERR
0160                 "$f:$lineno: line exceeds $max_width characters ($l_width)\n";
0161         }
0162     }
0163 
0164     # Any blanks at the end of the file are discarded
0165 
0166     if ($in_bytes != $out_bytes) {
0167         # Only write to the file if changed
0168         seek(FILE, 0, 0);
0169         print FILE @lines;
0170 
0171         if ( !defined($where = tell(FILE)) ||
0172              !truncate(FILE, $where) ) {
0173             die "$name: Failed to truncate modified file: $f: $!\n";
0174         }
0175     }
0176 
0177     close(FILE);
0178 }