my @candidates; # should use a hash instead, if my $c_id; # the numbering has lots of big gaps while (<>) { if ( /CANDIDATE\s+(\d+)/ ) { $c_id = $1; } elsif ( /DNA\s+.Note.\s*(\S*.*)/ ) { my $proteins = $1; my @p_set = (); while ( $proteins =~ /\[(NP\s+\d+)\]/g ) { push @p_set, $1; } $candidates[$c_id]{proteins} = [ @pset ]; } elsif ( /R-score\s*=\s*([.\d]+)/ ) { $candidates[$c_id]{rscore} = $1; } }