#!/usr/bin/perl
use warnings;
use strict;
use LWP::Simple;
use Spreadsheet::WriteExcel;
use Tk;
my $excelRow = 0;
my $URL;
my @people;
my $workbook = Spreadsheet::WriteExcel->new("names.xls");
my $worksheet = $workbook->add_worksheet("Names");
my $mw = MainWindow->new();
$mw->title("Data Extractor");
#Instructions
$mw->Label(-text => "Enter a URL, and hit Extract.")->grid(-row => 0,
-column => 0,
-columnspan => 3);
#Interface
$mw->Label(-text => "URL:")->grid(-row => 1,
-column => 0,
-sticky => 'w');
$mw->Entry(-textvariable => \$URL)->grid(-row => 1,
-column => 1,
-columnspan => 3,
-ipadx => '30');
$mw->Button(-text => "Extract",
-command => \&getData)->grid(-row => 2,
-column => 0,
-columnspan => 3,
-pady => '10');
MainLoop();
sub getData() {
my @data = split /\n/, get($URL);
foreach my $line (@data) {
if ($line =~ /
/, $line;
foreach my $person (@people) {
if ($person =~ /mailto:/) {
&extractData($person);
}
}
}
}
foreach my $person (@people) {
my @personData;
push @personData, $person->{"name"};
push @personData, $person->{"email"};
push @personData, $person->{"phone"};
$worksheet->write_row($excelRow, 0, \@personData) or die "Write failed: $!\n";
$excelRow++;
}
@people = undef;
}
sub extractData() {
my $person = shift @_;
my ($name, $email, $phone);
if ($person =~ /(.*)<\/strong>/) {
$name = $1;
}
if ($person =~ /\"mailto:(.*)\">\1/) {
$email = convertHTMLEmail($1);
}
if ($person =~ /(\(\d{3}\) ?\d{3}-?\d{4})/) {
$phone = $1;
}
push @people, {"name" => $name, "email" => $email, "phone" => $phone};
}
sub convertHTMLEmail() {
my $emailString = shift @_;
$emailString =~ s///g;
my @chars = split /;/, $emailString;
my @convertedChars;
foreach (@chars) {
push @convertedChars, chr($_);
}
return join "", @convertedChars;
}
|