in reply to Pulling all instances of a regex out
#!/usr/bin/perl use strict; use warnings; use HTML::TokeParser::Simple; my $html; { local $/ = undef; $html = <DATA> } my $tp = HTML::TokeParser::Simple->new(\$html) or die "Couldn't parse $html: $!"; my (@results); while (my $t = $tp->get_token) { if ($t->is_start_tag('img')){ push @results, $t->get_attr('src'); } elsif ($t->is_text){ push @results, $t->as_is; } } print "*$_*\n" for @results; __DATA__ <br><br> <table width="100%" cellpadding="2" cellspacing="0" border="0"> <tr> <td align="left" valign="bottom"> <img src='http://images.tek-tips.com/items/image001.gif' alt='Image001 +' width='40' height='40' border='0'> Description of image here </td> <td align="right" valign="bottom"></td> </tr> <tr> <td align="left" valign="bottom"> <img src='http://images.tek-tips.com/items/image002.gif' alt='Image002 +' width='40' height='40' border='0'> Description of image here </td> <td align="right" valign="bottom"> </td> </tr> </table>
The 'text' comes with a fair bit of white space which you need to remove.
|
|---|