in reply to Re: html parsing/regex
in thread html parsing/regex
I get: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: NAME: Text: Delay: So I think I misused a part of your script incorrectly because I'm not getting errors or anything. Did I mess up anywhere with
Sorry I keep bugging you, I promise this'll be the last time (I think I'll give up for a while if nothing else works ((note to self: this is why you stopped using HTML:: modules in the first place)) ). Thanks for your help!use strict; use CGI qw/:standard/; use HTML::Tree; use LWP::Simple; print header, start_html('test printing'); #my $count; #until ($count eq "5") { #$count++; my $funky = "http://www.allpoetry.com/chat//page=1"; my $content = get($funky); my $tree = HTML::Tree->new(); $tree->parse($content); # retrieve the text and split into lines my @lines = split "<br>", $tree->as_text; local $/; my @good_lines; my $good_lines; for my $lines (@lines) { $lines =~ s/\)/\)<br>/g; while($lines =~ m/Next Chatter \>(.*?)\< Previous Chatter/gs){ $good_lines = $1; push @good_lines,$good_lines; } foreach (@good_lines){ my @lines = split /<br>/; foreach (@lines){ next unless $_; /([^:]+): (.+) \((\d+) minutes ago\)/; my( $name, $text, $delay ) = ( $1, $2, $3 ); print "NAME:$name\nText:$text\nDelay:$delay\n\n"; } } }
|
|---|