in reply to ugly regex question
You should also think in more general terms. Think about parsing the html. What you're looking for is stuff in between > and <.
# C:\dev\loose\html.treebuilder3.pl use strict; use warnings; use HTML::TreeBuilder; my $html = q~ <tr align=right><td>/export/home3</td><td>308218</td><td>307200</td><t +d>308224</td><td>7.0 days</td><td>0</td><td>0</td><td>0</td><td>-</td +></tr> ~; my $t = HTML::TreeBuilder->new(); $t->parse($html); $t->eof; for my $row ( $t->find_by_tag_name('tr') ){ print join ' | ', map { ref $_ ? $_->as_text : $_ } @{ $row->content() },$/; } warn $_ for $html =~ m{> ( [^>]+ ) </}gx; __END__ /export/home3 | 308218 | 307200 | 308224 | 7.0 days | 0 | 0 | 0 | - | /export/home3 at html.treebuilder3.pl line 23. 308218 at html.treebuilder3.pl line 23. 307200 at html.treebuilder3.pl line 23. 308224 at html.treebuilder3.pl line 23. 7.0 days at html.treebuilder3.pl line 23. 0 at html.treebuilder3.pl line 23. 0 at html.treebuilder3.pl line 23. 0 at html.treebuilder3.pl line 23. - at html.treebuilder3.pl line 23.
|
|---|