#!/usr/bin/perl use strict; use warnings; use HTML::TokeParser::Simple; use Data::Dumper; my $p = HTML::TokeParser::Simple->new(*DATA) or die "couldn't parse DATA: $!\n"; my (@records, %record, $start, $i); while (my $t = $p->get_token){ if ($t->is_start_tag('span')){ if ($t->get_attr('class') and $t->get_attr('class') eq 'jobname'){ $record{jobname} = $p->get_trimmed_text('/span'); } elsif ($t->get_attr('class') and $t->get_attr('class') eq 'jobserial'){ $record{jobserial} = $p->get_trimmed_text('/span'); } elsif ($t->get_attr('name') and $t->get_attr('name') eq 'em'){ push @{$record{em}}, $p->get_trimmed_text('/span'); } elsif ($t->get_attr('name') and $t->get_attr('name') eq 'offices'){ $record{offices} = $p->get_trimmed_text('/span'); } } if ($t->is_start_tag('blockquote')){ next if $i; my $txt = $p->get_trimmed_text(('blockquote')); $record{job_desc} = $txt; push @records, {%record}; %record = (); $i++; } } print Dumper \@records; __DATA__

Accounting Assistant, Level 2 (19203)
Current members:
Plow, Elliot Wang, Susan
Huston

Job descriptions here. This block quoted text contains a job description and it what I am really looking to recover.
Go to the top of this page.
Check for open positions now!
#### ---------- Capture Output ---------- > "c:\perl\bin\perl.exe" _new.pl $VAR1 = [ { 'em' => [ 'Plow, Elliot', 'Wang, Susan' ], 'job_desc' => 'Job descriptions here. This block quoted text contains a job description and it what I am really looking to recover.', 'offices' => 'Huston', 'jobserial' => '(19203)', 'jobname' => 'Accounting Assistant, Level 2' } ]; > Terminated with exit code 0.