comment on

#!/usr/bin/perl -w

use strict;

my $records_out = 0;

# Did they not tell us what to use?
if (@ARGV != 2) {
        die("$0 - No file to process or output.\n");
        }

# Get total size, in bytes, of file for percentage counter
my @stat = stat($ARGV[0]);
my $total_bytes = $stat[7];

$| = 1;                                         # Autoflush output
open(IN, $ARGV[0]) || die("$0 - Cannot open $ARGV[0].\n");
open(OUT, ">$ARGV[1]");

print "Dumping Account holder name and Account number from $ARGV[0]...
+\n";
print "Outputting to $ARGV[1]\n";
my $read_bytes = 0;
my $page_contents = '';
my $read_pages = 1;
for(;;) {
        my $EOF = read(IN, my $char, 1);
        last if ($EOF == 0);

        if (substr($char, 0, 1) =~ /\x0c/) {
                my @temp = split(/\n/, $page_contents); # Split lines 
+up
                my $line_count = @temp;
                next if $line_count < 2;

                # Get name, address, city, state & zipcode from page
                my $name = substr($temp[58], 5, 30);
                my $addr = substr($temp[59], 5, 30);
                my $addr2 = substr($temp[60], 5, 30);
                my ($city, $state) = split(/\,/, substr($temp[61], 5, 
+20));
                my $zip = substr($temp[61], 26, 5);

                $addr = &normalise($addr);
                $addr2 =&normalise($addr2);
                $city = &normalise($city);
                $state = &normalise($state);
                $zip = &normalise($zip);

                &format($name, $addr, $addr2, $city, $state, $zip);
                $read_pages++;
                $page_contents = '';
                }
        $page_contents .= $char;                        # Push char on
+to page

        # Do percentage display
        $read_bytes++;
        my $percent = $read_bytes / $total_bytes * 100; # Work out per
+centage
        $percent = sprintf("%.2f", $percent);           # Set to 2 dp
        print "$percent%\r";
        }
$read_pages++;
close(IN);
close(OUT);

print "\n-> Read $read_pages pages, and dumped $records_out records ou
+t.\n";
exit(0);


# Format data into ANCOA import standard
sub format {
        my ($name, $addr, $addr2, $city, $state, $zip) = @_;

        my $record = "1";               # Record Type
        $record .= &pad($name, 35);
        $record .= &pad($addr, 30);
        $record .= &pad($addr2, 30);
        $record .= &pad($city, 30);
        $record .= &pad($state, 2);
        $record .= &pad($zip, 5);
        $record .= &pad(" ", 342);      # Empty Optional Customer Info
        my $len = length($record);
        print OUT "$record";
        $records_out++;
        }

# Pad out with trailing whitespace as necessary
sub pad {
        my ($data, $len) = @_;

        while(length($data) < $len) {
                $data .= " ";
                }

        return $data;
        }

# Get rid of leading/trailing whitespace
sub normalise {
        my ($value) = @_;

        if (defined $value) {

        for(my $i = 0; $i <= length($value); $i++) {
                if (substr($value, $i, 1) ne " ") {
                        $value = substr($value, $i, length($value));
                        last;
                        }
                }

        for(my $i = length($value); $i >= 0; $i--) {
                next if substr($value, $i, 1) eq "";
                if (substr($value, $i, 1) ne " ") {
                        $value = substr($value, 0, $i + 1);
                        last;
                        }
                }
        }
        return $value;
        }
[download]

In reply to Re: Perl scripts slowing down towards end of file processing by JPaul
in thread Perl scripts slowing down towards end of file processing by JPaul

Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!

Titles consisting of a single word are discouraged, and in most cases are disallowed outright.

Read Where should I post X? if you're not absolutely sure you're posting in the right place.

Please read these before you post! —

Posts may use any of the Perl Monks Approved HTML tags:

a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, details, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, summary, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr

You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)

	For:		Use:
	&		`&`
	<		`<`
	>		`>`
	[		`[`
	]		`]`

Link using PerlMonks shortcuts! What shortcuts can I use for linking?

See Writeup Formatting Tips and other pages linked from there for more info.