#!/usr/bin/perl use warnings; use strict; use LWP::Simple; use Spreadsheet::WriteExcel; use Tk; my $excelRow = 0; my $URL; my @people; my $workbook = Spreadsheet::WriteExcel->new("names.xls"); my $worksheet = $workbook->add_worksheet("Names"); my $mw = MainWindow->new(); $mw->title("Data Extractor"); #Instructions $mw->Label(-text => "Enter a URL, and hit Extract.")->grid(-row => 0, -column => 0, -columnspan => 3); #Interface $mw->Label(-text => "URL:")->grid(-row => 1, -column => 0, -sticky => 'w'); $mw->Entry(-textvariable => \$URL)->grid(-row => 1, -column => 1, -columnspan => 3, -ipadx => '30'); $mw->Button(-text => "Extract", -command => \&getData)->grid(-row => 2, -column => 0, -columnspan => 3, -pady => '10'); MainLoop(); sub getData() { my @data = split /\n/, get($URL); foreach my $line (@data) { if ($line =~ //, $line; foreach my $person (@people) { if ($person =~ /mailto:/) { &extractData($person); } } } } foreach my $person (@people) { my @personData; push @personData, $person->{"name"}; push @personData, $person->{"email"}; push @personData, $person->{"phone"}; $worksheet->write_row($excelRow, 0, \@personData) or die "Write failed: $!\n"; $excelRow++; } @people = undef; } sub extractData() { my $person = shift @_; my ($name, $email, $phone); if ($person =~ /(.*)<\/strong>/) { $name = $1; } if ($person =~ /\"mailto:(.*)\">\1/) { $email = convertHTMLEmail($1); } if ($person =~ /(\(\d{3}\) ?\d{3}-?\d{4})/) { $phone = $1; } push @people, {"name" => $name, "email" => $email, "phone" => $phone}; } sub convertHTMLEmail() { my $emailString = shift @_; $emailString =~ s/&#//g; my @chars = split /;/, $emailString; my @convertedChars; foreach (@chars) { push @convertedChars, chr($_); } return join "", @convertedChars; }