use strict;
use warnings;
# File containing review definitions
my $DATA_FILE = 'ReviewUpdatesMarch2007';
# Names of fields in data file
my @DATA_FIELDS = qw( owner new_date url );
# Validation routines for each field;
my %VALIDATE_FIELDS;
{ # Limit scope of disabled warnings;
no warnings 'uninitialized';
%VALIDATE_FIELDS = (
# owner must be one or more word characters plus whitespace
owner => sub {
my $owner = shift;
return $owner =~ /^[\w\s]+$/;
},
# new_date must be 6 digits broken into pairs by slashes or dashe
+s
new_date => sub {
my $date = shift;
return $date =~ /^\d\d[\/-]\d\d[\/-]\d\d$/;
},
# url must be all word characters or :/&?+#
url => sub {
my $url = shift;
return $url =~ /^[\w:\/&?+#]+$/;
},
# this is not really a good way to use to validate urls.
# there is probably a cpan module that will do so correctly.
# but I am too lazy to find it for you.
);
}
my @reviews; # Review data as hash references
# Uncomment this to pull data from file
#open ( FILE, '<', $DATA_FILE )
# or die "Error opening data file $DATA_FILE - $!";
# Parse lines and store in @reviews
ITEM:
# Uncomment this to pull data from file
#while ( defined my $item = <FILE> )
while ( defined( my $item = <DATA> ) ) # Delete this to pull data from
+ file
{
chomp $item; # Failing to chomp your input may have been the cause
+of your error
my %item;
@item{ @DATA_FIELDS } = split( /\t/, $item );
foreach my $field ( @DATA_FIELDS ) {
unless(
$VALIDATE_FIELDS{$field}->( $item{$field} )
)
{
warn "Invalid data in field '$field' from line '$item'\n";
# Skip bad data
next ITEM;
}
}
push @reviews, \%item;
}
# Uncomment this to pull data from file
#close FILE
# or die "Error closing $DATA_FILE - $!\n";
# Uncomment to dump your data table for debugging purposes.
#use Data::Dumper;
#print Dumper \@reviews;
foreach my $review ( @reviews ) {
my $url = $review->{url};
print "$url\n";
my $slurp = read_file( $url );
}
sub read_file {
# do stuff here
# this is probably where your error is.
}
__DATA__
Good Owner 12/12/23 good_url
Bad!!!Owner 22/22/22 good_url
Good Owner2 BAD DATE good_url
Good Owner3 12/12/23 bad url
Good Owner4 12/12/23 good_url
|