in reply to HTML Parsing /Regex Qstn
-- Romanuse HTML::TreeBuilder; use Data::Dump qw{dump}; my $tree = HTML::TreeBuilder->new_from_file("your_html_file"); my %output = (); for my $div ($tree->find("div")) { if(my $titlefield = $div->look_down(class => "titlefield")) { my $href = $titlefield->attr("href"); $output{$href} = [$titlefield->attr("title")]; my $date = ""; if(my $datefield = $div->look_down(class => "datefield")) { $date = $datefield->as_text(); } push @{$output{$href}}, $date; # ... } } dump { %output };
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: HTML Parsing /Regex Qstn
by sri1230 (Novice) on Jan 21, 2010 at 16:53 UTC | |
by planetscape (Chancellor) on Jan 22, 2010 at 01:02 UTC | |
by sri1230 (Novice) on Jan 21, 2010 at 17:06 UTC | |
by bobr (Monk) on Jan 21, 2010 at 17:38 UTC | |
by sri1230 (Novice) on Jan 21, 2010 at 21:47 UTC |