use strict; use warnings; ++$|; ## buffering off my %align; my $count = 0; my %c2name; use Bio::SimpleAlign; my $self = Bio::SimpleAlign->new(); print "---> Reading data\n"; while( ) { /^([^\#]\S+)\s+([A-Za-z\.\-]+)\s*/ && do { my $name = $1; my $seq = $2; if( ! defined $align{$name} ) { $count++; $c2name{$count} = $name; } $align{$name} .= $seq; print "Count >$count< - Adding Name >$name<\n\tSeq >$seq<\n"; }; } print "---> Forming alignment\n"; $count = 0; foreach my $no ( sort { $a <=> $b } keys %c2name ) { my $name = $c2name{$no}; my( $seqname, $start, $end, $strand ); if( $name =~ /(\S+)\/(\d+)-(\d+)$/ ) { $seqname = $1; $start = $2; $end = $3; } elsif ( $name =~ /(\S+)\/(\d+)-(\d+):(\d+)-(\d+)/ ) { $seqname = $1; my $ns = $2; my $s = $3; my $e = $4; my $ne = $5; $start = "$ns-$s"; $end = "$e-$ne"; # surprise: this is legal $strand = 1; } ## make sure id is unique #$seqname .= 'x' while ( exists $align{id}{$seqname} ); #++$align{id}{$seqname}; print "Name >$name<\n\tID >$seqname<\n"; my $seq = new Bio::LocatableSeq( '-seq'=>$align{$name}, '-id'=>$seqname, '-start'=>$start, '-end'=>$end, '-strand'=>$strand, '-type'=>'aligned' ); $self -> add_seq($seq); $count++; } print "Count : $count\n"; __DATA__ hit1_EF374296.1_1-432/1-432 uauGGAAACWUACU hit1_AM161438.1_1-497/20-516 gAGAAACCCUGGAA hit1_AM161438.1_1-497/1-1:497-993 gGAAAAUCCGUCGA hit1_EF374296.1_1-432/1-1:432-863 UGAAAAUCCGUCGA hit1_EF374296.1_509-949/509-509:949-1389 GGAAAAUCCGUCGA hit1_EF374296.1_509-949/938-1382 AUAGUAAGAGGAAA hit1_EF374297.1_30-470/30-30:470-910 GGAAAAUCCGUCGA