Regex used: /^$RE{num}{real}\s+(\d+)\s+\.\.\s+(\d+)\s*/ >hsa_circ_0075116|chr5:175956288-175956388-|NM_014901|RNF44 FORWARD -4.6 12 .. 35 xxxxGTGTGTGGTCT GC TTCAGTGACTTCGAGGCGCG GC AGCTGCTCCGAGTCC -5.5 11 .. 36 xxxxxGTGTGTGGTC TGC TTCAGTGACTTCGAGGCGCG GCA GCTGCTCCGAGTCCT #### Dumper: $VAR1 = 'hsa_circ_0075116|chr5:175956288-175956388-|NM_014901|RNF44 FORWARD'; $VAR2 = [ { 'end' => '35', 'start' => '12' }, { 'end' => '36', 'start' => '11' } #### New regex: /^(\s+)?$RE{num}{real}\s+(\d+)\s+\.\.\s+(\d+)\s*/ ## addition of (\s+)? to the beginning *\s*-5 56 .. 70 CTATGCCCCTTATTG TATCTG GGG CAGATG ATCGTCAAGTGAAGA #### $VAR125 = 'hsa_circ_0067224|chr3:128345575-128345675-|NM_002950|RPN1 FORWARD'; $VAR126 = [ { 'end' => '6', 'start' => undef } #### #!/usr/bin/perl use strict; use warnings; use Data::Dumper; use Regexp::Common qw /number/; open my $hairpin_file, '<', "new_xt_spacer_results.hairpin", or die $!; my %HoA_sequences; my $curkey; while (<$hairpin_file>){ chomp; if (/^>(\w+\d+\|\w+:\d+-\d+[-|+]\|\w+\|\w+\s+\w+$)/){ $curkey = $1; }elsif (my ($start, $end) = /^(\s+)?$RE{num}{real}\s+(\d+)\s+\.\.\s+(\d+)\s*/ ) { die "value seen before header: '$_'" unless defined $curkey; push @{ $HoA_sequences{$curkey}}, { start=>$start, end=>$end }; } else { die "don't know how to parse: '$_'" } } print Dumper(%HoA_sequences);