Regex used: /^$RE{num}{real}\s+(\d+)\s+\.\.\s+(\d+)\s*/
>hsa_circ_0075116|chr5:175956288-175956388-|NM_014901|RNF44 FORWARD
-4.6 12 .. 35 xxxxGTGTGTGGTCT GC TTCAGTGACTTCGAGGCGCG GC AGCTGCTCCGAGTCC
-5.5 11 .. 36 xxxxxGTGTGTGGTC TGC TTCAGTGACTTCGAGGCGCG GCA GCTGCTCCGAGTCCT
####
Dumper:
$VAR1 = 'hsa_circ_0075116|chr5:175956288-175956388-|NM_014901|RNF44 FORWARD';
$VAR2 = [
{
'end' => '35',
'start' => '12'
},
{
'end' => '36',
'start' => '11'
}
####
New regex: /^(\s+)?$RE{num}{real}\s+(\d+)\s+\.\.\s+(\d+)\s*/
## addition of (\s+)? to the beginning
*\s*-5 56 .. 70 CTATGCCCCTTATTG TATCTG GGG CAGATG ATCGTCAAGTGAAGA
####
$VAR125 = 'hsa_circ_0067224|chr3:128345575-128345675-|NM_002950|RPN1 FORWARD';
$VAR126 = [
{
'end' => '6',
'start' => undef
}
####
#!/usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use Regexp::Common qw /number/;
open my $hairpin_file, '<', "new_xt_spacer_results.hairpin", or die $!;
my %HoA_sequences;
my $curkey;
while (<$hairpin_file>){
chomp;
if (/^>(\w+\d+\|\w+:\d+-\d+[-|+]\|\w+\|\w+\s+\w+$)/){
$curkey = $1;
}elsif (my ($start, $end) =
/^(\s+)?$RE{num}{real}\s+(\d+)\s+\.\.\s+(\d+)\s*/ ) {
die "value seen before header: '$_'"
unless defined $curkey;
push @{ $HoA_sequences{$curkey}},
{ start=>$start, end=>$end };
}
else { die "don't know how to parse: '$_'" }
}
print Dumper(%HoA_sequences);