#! /usr/bin/perl #fetch the webpage my $url = 'http://www.foo'; use LWP::Simple; my $content = get $url; die "Couldn't get $url" unless defined $content; print $content; #strip the tags use HTML::Strip; my $hs = HTML::Strip->new(); my $clean_text = $hs->parse( $content ); $hs->eof; #write to file $append = 0; if ($append) { open(MYOUTFILE, ">clean_text"); #open for write, overwrite } else { open(MYOUTFILE, ">>clean_text"); #open for write, append } #read the file into an array open(MYINPUTFILE, "<clean_text"); # open for input my(@lines) = <MYINPUTFILE>; # read file into list #so far so good, but check below for the troubles. my $search = 'foo'; my @where = grep { $lines[$_] eq $search } 0 .. $#lines; print @where;
will return nothing at all.
The stripped file looks something like below.
    464
      02
  18/03/07
        ST / "Turf" / "B+2"
Edit: g0n - code tags
In reply to html into an array by monkeybus
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |