in reply to parsing html
I'm not to sure about your spec but if it is get the text within bold tags that are within td tags then something like this will get you started.
#!/usr/bin/perl use warnings; use strict; use HTML::TokeParser::Simple; my $html = do{local $/;<DATA>}; my $p = HTML::TokeParser::Simple->new(string => $html); my ($in_td, $in_b); while (my $t = $p->get_token) { $in_td++, next if $t->is_start_tag(q{td}); $in_b++, next if $in_td and $t->is_start_tag(q{b}); next unless $in_td and $in_b; if ($t->is_text){ print $t->as_is, qq{\n}; $in_td = 0; $in_b = 0; } } __DATA__ <TR class="violet3"> <TD ><B>hsa-miR-107</B></TD> <TD >17.1922</TD> <TD >-21.47</TD> <TD >2.119850e-02</TD> <TD >2.097540e-02</TD> <TD >6.191350e-04</TD> <TD >106</TD> <TD >127</TD> <TD ><pre><FONT COLOR="#FFFFFF">a</FONT><FONT COLOR="#FFFFFF">c</FON +T><FONT COLOR="#FFFFFF">u</FONT><FONT></FONT> </TR> <TR class="violet2"> <TD ><B>hsa-miR-103</B></TD> <TD >17.1922</TD> <TR class="violet3"> <TD ><B>hsa-miR-651</B></TD> </TR> <TR class="violet2"> <TD ><B>hsa-miR-320</B></TD>
Good luck!hsa-miR-107 hsa-miR-103 hsa-miR-651 hsa-miR-320
|
|---|