my $quotes="Quotes.txt";
open(OUTPUT, ">$quotes")||die("Could not open $quotes!");
my $page_num=1;
while ($page_num<=10){
my $htmlpages="Page$page_num.html";
open (INPUT,"$htmlpages")||die("Could not open $htmlpages");
my $line="";
while ($line=){
if($line=~m/(.+?)<\/span/ig){
my $quotes=$1;
$quotes =~ s/I'm/I am/ig;
$quotes =~ s/(\w+?)'re/$1 are/ig;
$quotes =~ s/(\w+?)'s/$1 is/ig;
$quotes =~ s/(\w+?)n't/$1 not/ig;
$quotes =~ s/it's/it is/ig;
$quotes =~ s/(\w+?)'ll/$1 will/ig;
$quotes =~ s/I've/I have/ig;
$quotes =~ s/won't/will not/ig;
$quotes =~ s/can't/cannot/ig;
$quotes =~ s/\&\#34;/'/ig;
$quotes =~ s/\&\#39;/'/ig;
$quotes =~ s/let's/let us/ig;
$quotes =~ s/lady's/lady is/ig;
print OUTPUT "$quotes\n";
}
}
$page_num=$page_num+1;
close(INPUT);
}
close(OUTPUT);
my $quotes_0="WordCount.txt";
my $quotes_1="Quotes.txt";
open(QUOTES,">$quotes_0")||die("Could not open $quotes_0");
my $stopwords="stopwords.txt";
open(WORDS,"$stopwords")||die("Could not open $stopwords");
open(OLD,"$quotes_1")||die("Could not open $quotes_1");
my $line1="";
while(my $stop=){
if($stop=~m/(.+?)/ig){
my $stopwords=$1;
if ($line1=){
if($line1=~s/\b($stopwords)\b//ig){
print QUOTES "$line1\n";
}
}
}
}
close(WORDS);
close(OLD);
close(QUOTES);