#$pkey is just a counter $c->{$pkey}{left_instance}{sequence}; $c->{$pkey}{right_instance}{sequence}; $c->{$pkey}{comments}; #i am guessing $c->{$pkey}{match} as well but don't know #what that does. #### use strict; use warnings; use Data::Dumper; $/ = "\n\n\n"; my $pkey = 1; my $c = {}; while ( ) { #get comments if any $c->{$pkey}{comments} = $1 if s/^(#[^\n]+)\n//; #get stats if any _load_stats($pkey,$1,$c) if !/^Sbjct/ and s/\s*(\d[^\n]+)//; #loop over remaining data to get sequences while ( /Sbjct: ([-ACGT]+)\s+\d+\n(?:\s*!\s+)?Sbjct: ([-ACTG]+)/g) { $c->{$pkey}{left_instance}{sequence} .= $1; $c->{$pkey}{right_instance}{sequence} .= $2; } #add 1 to $pkey for next one #though we should probably be using an AoH #instead of a HoA so we don't need the counter #and can just use push. $pkey++; } #print resulting structure for verification print Dumper $c; #don't know what _load_stats really does #so this will need changing sub _load_stats { my ($pkey,$data,$href) = @_; $href->{$pkey}{stats} = [split ' ',$data]; } ##_load_stats __DATA__ # args=-supermax -d -l 50 -h 7 -seedlength 7 -evalue 0.0001 -s 60 /home/anonymous_monk/clusters/all/all_clusters 140 8778 333 D 140 8778 334 -7 3.60e-56 -259 95.00 Sbjct: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 393 ! Sbjct: -AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 394 Sbjct: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGGGGGGCCTT 453 Sbjct: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGGGGGGCCTT 454 Sbjct: TTAAAATTCCCCCC-GGGGGG 474 ! Sbjct: TTAAAATTCCCCCCGGGGGGG 475 170 8778 333 D 140 8778 334 -7 3.60e-56 -259 95.00 Sbjct: CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 393 ! Sbjct: -CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 394 Sbjct: CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGGGGGGCCTT 453 Sbjct: TAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGGGGGGCCTT 454 Sbjct: GTAAAATTCCCCCC-GGGGGG 474 ! Sbjct: GTAAAATTCCCCCCGGGGGGG 475 # args=-supermax -d -l 50 -h 7 -seedlength 7 -evalue 0.0001 -s 655 /home/anonymous_monk/clusters/all/all_clusters Sbjct: CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 393 ! Sbjct: -CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 394 Sbjct: CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGGGGGGCCTT 453 Sbjct: TAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGGGGGGGGGCCTT 454 Sbjct: CCAAAATTCCCCCC-GGGGGG 474 ! Sbjct: CCAAAATTCCCCCCGGGGGGG 475