my $quotes="Quotes.txt"; open(OUTPUT, ">$quotes")||die("Could not open $quotes!"); my $page_num=1; while ($page_num<=10){ my $htmlpages="Page$page_num.html"; open (INPUT,"$htmlpages")||die("Could not open $htmlpages"); my $line=""; while ($line=){ if($line=~m/(.+?)<\/span/ig){ my $quotes=$1; $quotes =~ s/I'm/I am/ig; $quotes =~ s/(\w+?)'re/$1 are/ig; $quotes =~ s/(\w+?)'s/$1 is/ig; $quotes =~ s/(\w+?)n't/$1 not/ig; $quotes =~ s/it's/it is/ig; $quotes =~ s/(\w+?)'ll/$1 will/ig; $quotes =~ s/I've/I have/ig; $quotes =~ s/won't/will not/ig; $quotes =~ s/can't/cannot/ig; $quotes =~ s/\&\#34;/'/ig; $quotes =~ s/\&\#39;/'/ig; $quotes =~ s/let's/let us/ig; $quotes =~ s/lady's/lady is/ig; print OUTPUT "$quotes\n"; } } $page_num=$page_num+1; close(INPUT); } close(OUTPUT); my $quotes_0="WordCount.txt"; my $quotes_1="Quotes.txt"; open(QUOTES,">$quotes_0")||die("Could not open $quotes_0"); my $stopwords="stopwords.txt"; open(WORDS,"$stopwords")||die("Could not open $stopwords"); open(OLD,"$quotes_1")||die("Could not open $quotes_1"); my $line1=""; while(my $stop=){ if($stop=~m/(.+?)/ig){ my $stopwords=$1; if ($line1=){ if($line1=~s/\b($stopwords)\b//ig){ print QUOTES "$line1\n"; } } } } close(WORDS); close(OLD); close(QUOTES);