#!/usr/bin/perl use strict; use warnings; use HTML::TableContentParser; use HTML::TokeParser::Simple; use WWW::Mechanize; use Data::Dumper; my $mech = WWW::Mechanize->new( autocheck => 1 ); $mech->get( 'http://nexus.ugent.be/jeroen/bc.html' ); my @gene_score; my $table = HTML::TableContentParser->new()->parse( $mech->content() ); for my $rank ( 1 .. $#{ $table->[0]{rows} } ) { my $cells = $table->[0]{rows}[$rank]{cells}; push @gene_score, { Rank => $rank, Ensembl_ID => Get_Label( $cells->[0]{data} ), RefSeq_ID => $cells->[1]{data}, Gene_Symbol => $cells->[2]{data}, Band => $cells->[3]{data}, Gene_Name => $cells->[4]{data}, Probability => $cells->[5]{data}, Known_Phenotype => $cells->[6]{data}, }; } sub Get_Label { my $link = shift; my $p = HTML::TokeParser::Simple->new( \$link ); while ( my $token = $p->get_token ) { next if ! $token->is_start_tag; return $token->return_attr( 'href' ) =~ /gene=(\w+)$/; } }