#!/usr/bin/perl #################################################### # InvClean.pl : Raw Scanned File Processing program # Version 1.0 # Written by Robb Pickinpaugh # 01/31/2002 # for use on Windows NT #################################################### use strict; use warnings; my @args = ( $ARGV[0], # file to process "$ARGV[0].clean", # processed file 16, # numeric value of the "usual" # line starting character 16, # "usual" starting length of lines # starting with $correct_start_char 13, # correct length of lines after they # have been stripped 14 # length of lines that do not include # the extra stop and start characters ); my($raw_file_length, $clean_file_length) = process_file( @args ); print "$raw_file_length lines read from $args[0]\n"; print "$clean_file_length lines written to $args[1]\n"; print "\a"; exit(0); sub process_file { my ( $file_name, $clean_file_name, $correct_start_char, $correct_start_length, $correct_clean_length, $typed_length) = @_; open RAW, $file_name or die "Cannot open $file_name: $!"; open CLEAN, ">$clean_file_name" or die "Cannot open $clean_file_name: $!"; my ($raw_lines, $clean_lines); while ( my $line = ) { my $line_length = length $line; my $start_char = ord $line; if ( $start_char == $correct_start_char ) { next if $line_length != $correct_start_length; if ( $line_length == $correct_start_length ) { print CLEAN substr($line, 1, -2), "\n"; $clean_lines++; } } else { if ( $line_length > $correct_clean_length ) { print CLEAN substr( substr($line, 0, -2), -$correct_clean_length), "\n"; $clean_lines++; } elsif ( $line_length == $typed_length ) { print CLEAN $line; $clean_lines++; } } $raw_lines = $.; } close RAW or die "cannot close $file_name: $!"; close CLEAN or die "cannot close $clean_file_name: $!"; return ($raw_lines, $clean_lines); } __END__