Somewhere inside you see the code citromatik added... It will take me a day to explain all this in English. However, citromatik's code worked OK, but I realized I need to also add some boundaries as to where the string can expand...$arxeio_structures = $ARGV[0]; $arxeio_seqs = $ARGV[1]; $arxeio_align = $arxeio_seqs.'.align.final_align'; $/="//\n"; #apo to arxeio domwn apothikeuw +AC, akolouthia kai consensus tis topologias open DOMES, $arxeio_structures; while(<DOMES>) { if($_=~/^>(\w{6})\|.*?\t([\w\d\_]+)/m) { $structure_prot=$_; $tm_ac=$1; $tm_pfam=$2; if($structure_prot=~/^SEQ:(.*)/m) { $tm_seq=$1; } if($structure_prot=~/^CON:(.*)/m) { $tm_cons=$1; } } $hash_structure{$tm_ac} = $tm_seq; $hash_cons{$tm_ac} = $tm_cons; #$HoA_3D{$tm_ac} = [$tm_pfam, $tm_seq, $tm_cons]; # Ektypwsi +: print "$ac: @{ $HoA_3D{$ac} }\n"; $tm_ac=$tm_seq=$tm_cons=$tm_pfam=''; } close DOMES; $/="\n"; open ALIGN, $arxeio_align; #apo to arxeio alignment + apothikeuw kathe AC, tin antistoixi while(<ALIGN>) #stoixismeni seq kai apo pou + ksekinaei i seq tis stoixisis { #(giati exoume steilei mono to kommat +i pou xtypaei sto Pfam gia stoixisi) if($_=~/^(.*?)\|(\d+)-\d+\t(.*)/) { $ac_align=$1; $arxi_align=$2; $seq_align = $3; $hash_align{$ac_align}=$seq_align; $hash_start_align{$ac_align}=$arxi_align-1; } } close ALIGN; foreach $a(keys %hash_align) { if(exists($hash_structure{$a})) #an i akolouthia ex +ei 3-D domi, ftiaxnw neo LBL seq me vasi tis allages pou exei epifere +i i stoixisi (- kai .) { $seq_structure = $hash_structure{$a}; #print $seq_struc +ture."\n"; $lbl_structure = $hash_cons{$a}; #print $lbl_structure. +"\n"; $seq_apo_alignment = $hash_align{$a}; $thesi_arxis_alignment = $hash_start_align{$a}; @split_structure_lbl = split(//,$lbl_structure); @split_seq_apo_alignment = split(//,$seq_apo_alignment); $count_trash=0; for ($i=0; $i<=$#split_seq_apo_alignment; $i++) { if($split_seq_apo_alignment[$i] ne '-' && $split_seq_apo_a +lignment[$i] ne '.') { $thesi_wanted=$thesi_arxis_alignment + ($i-$count_tras +h); #print $i."\t".$split_seq_apo_alignment[$i]."\t".$coun +t_trash."\t".$thesi_wanted."\t".$split_structure_lbl[$thesi_wanted]." +\n"; $lbl_structure_new.=$split_structure_lbl[$thesi_wanted +]; } else { $count_trash++; #print $i."\t".$split_seq_apo_alignment[$i]."\t".$coun +t_trash."\t".$thesi_wanted."\t".$split_seq_apo_alignment[$i]."\n"; $lbl_structure_new.=$split_seq_apo_alignment[$i]; } } print $lbl_structure_new."\n"; while ($lbl_structure_new=~/[.-]*([^.-])(?:\1|[.-])*/g) { $arxi = $-[0]; $telos = $+[0]-1; $eidos = $1; push @AoA, [$eidos, $arxi, $telos]; } } else #ean i akolouthia den anikei se 3D dom +i, apla tin apothikeuw kai tin epeksergazomai parakatw { $not_3D{$a}=$hash_align{$a}; } } #for $i ( 0 .. $#AoA ) #{ # print "row $i is: @{$AoA[$i]}\n"; #} foreach $k(keys %not_3D) #pairnw twra tis akolouthi +es pou den exoun domi { $seq_no_3D=$not_3D{$k}; $copy_seq_no_3D=$seq_no_3D; #ftiaxnw antigrafo tis +akolouthias no3d kai diwxnw - kai . $copy_seq_no_3D =~ s/[-.]+//g; @split_copy_seq_no_3D=split(//, $copy_seq_no_3D); #split i akol +outhia align alla xwris - kai . @split_seq_no3D = split(//, $seq_no_3D); #split i original +akolouthia align print '>'.$k."\n"; print $seq_no_3D."\n\n"; $count_axrista=0; for $i ( 0 .. $#AoA ) { if($AoA[$i]->[0] eq 'I') { $arxi_I = $AoA[$i]->[1]; $telos_I = $AoA[$i]->[2]; $mikos_I = ($telos_I-$arxi_I)+1; $substring_I=substr($seq_no_3D,$arxi_I,$mikos_I); $axrista_I = ($substring_I =~ tr/-.//); $count_axrista=$axrista_I+$count_axrista; } elsif($AoA[$i]->[0] eq 'O') { $arxi_O = $AoA[$i]->[1]; $telos_O = $AoA[$i]->[2]; $mikos_O = ($telos_O-$arxi_O)+1; $substring_O=substr($seq_no_3D,$arxi_O,$mikos_O); $axrista_O = ($substring_O =~ tr/-.//); $count_axrista=$axrista_O+$count_axrista; } elsif($AoA[$i]->[0] eq 'M') #vriskw ola ta +TM kommatia tou AoA tis structure { $count_axrista_entering=$count_axrista; #print "row $i is: @{$AoA[$i]}\n"; $arxi_tm = $AoA[$i]->[1]; #arxi kai telos twn T +Ms stis seq twn ypoloipwn proteins tou align $telos_tm = $AoA[$i]->[2]; $mikos_tm = ($telos_tm-$arxi_tm)+1; $substring_M=substr($seq_no_3D,$arxi_tm,$mikos_tm); #print $substring_M."\t".length($substring_M)."\t".$arxi_t +m."\t".$telos_tm."\n"; $axrista_M = ($substring_M =~ tr/-.//); #yp +ologizw - kai . se kathe TM kommati $count_axrista_exiting=$axrista_M+$count_axrista; if($axrista_M>0) {$count_axrista=$count_axrista_exiting}; $substring_M=~ s/[-.]+//g; #diwnxnw ta +- kai . kai elegxw.... if(length($substring_M)>=7) #...an to TM pou ap +omenei exei mikos >=7, eimaste OK kai to kratame, alliws... { $thesi_telikis_arxis = $hash_start_align{$k}+$arxi_tm +-$count_axrista_entering; $thesi_telikou_telous = $hash_start_align{$k}+$telos_t +m-$count_axrista_exiting; #print 'MIKOS_MIN_7'."\t".$substring_M."\t".length($su +bstring_M)."\t".$thesi_telikis_arxis."\t".$thesi_telikou_telous."\t". +'BIKA ME: '."\t".$count_axrista_entering."\t".'VGIKA ME: '."\t".$coun +t_axrista_exiting."\t".'AXRISTA MESA: '."\t".$axrista_M."\n"; push @AoA, [$k, ($thesi_M_se_arxiki_seq+$arxi_tm), ($t +hesi_M_se_arxiki_seq+$mikos_tm)]; } else #stis parakatw periptwseis pre +pei na prostethoun AA sto Tm ,wste na ginei toulaxiston mikos 7 { $extra_length=7-length($substring_M); $arxi_epomeno_kommati=$AoA[$i+1]->[1]; $telos_epomeno_kommati=$AoA[$i+1]->[2]; $mikos_epomeno_kommati=$telos_epomeno_kommati-$arxi_ep +omeno_kommati+1; $arxi_proigoumeno_kommati=$AoA[$i-1]->[1]; $telos_proigoumeno_kommati=$AoA[$i-1]->[2]; $mikos_proigoumeno_kommati=$telos_proigoumeno_kommati- +$arxi_proigoumeno_kommati+1; $substring_proigoumeno = substr($seq_no_3D,$arxi_proig +oumeno_kommati,$mikos_proigoumeno_kommati); #ayto to kanw giati mp +orei to epomeno/proigoumeno kommati na exei 10AA $substring_epomeno = substr($seq_no_3D,$arxi_epomeno_k +ommati,$mikos_epomeno_kommati); #alla ta 7 na einai - i . $substring_proigoumeno=~ s/[-.]+//g; $substring_epomeno=~ s/[-.]+//g; $actual_length_proigoumeno = length($substring_proigou +meno); $actual_length_epomeno = length($substring_epomeno); if($split_seq_no3D[$telos_tm] ne '-' && $split_seq_no3 +D[$telos_tm] ne '.' && ($split_seq_no3D[$arxi_tm] eq '.' or $split_s +eq_no3D[$arxi_tm] eq '-')) #edw prepei na proekteinw pros ta arister +a [typos: ---...XX] { print $substring_M."\t".length($substring_M)."\t".$arx +i_tm."\t".$telos_tm."\t".'PROIGOUMENO:'."\t".$substring_proigoumeno." +\t".'EPOMENO:'."\t".$substring_epomeno."\n"; if ($actual_length_epomeno-$extra_length<1 && $act +ual_length_proigoumeno-$extra_length<1) #an de mporw na paw pouthe +na, to afinw ws exei { $final_tm=$substring_M; $thesi_telikis_arxis = $hash_start_align{$k}+$arx +i_tm-$count_axrista_entering; $thesi_telikou_telous = $hash_start_align{$k}+$tel +os_tm-$count_axrista_exiting; print 'EXEI ALLAGI:'."\t".$substring_M."\t".length +($substring_M)."\t".$thesi_telikis_arxis."\t".$thesi_telikou_telous." +\t".'BIKA ME: '."\t".$count_axrista_entering."\t".'VGIKA ME: '."\t".$ +count_axrista_exiting."\t".'AXRISTA MESA: '."\t".$axrista_M.":: DE MP +ORW"; print substr($copy_seq_no_3D,$thesi_telikis_arxis, +($thesi_telikou_telous-$thesi_telikis_arxis+1)); last; } elsif($actual_length_proigoumeno-$extra_length>=1) + #an to mikos tou proigoumeno kommatiou-ex +tra_AA>1, kanw kanonika tin proektasi pros ta aristera { $thesi=index($copy_seq_no_3D,$substring_M); $extra_AAs = substr($copy_seq_no_3D,$thesi-$extra_ +length,$extra_length); $final_tm=$extra_AAs.$substring_M; #$thesi_telikis_arxis = $hash_start_align{$k}+$ar +xi_tm-$count_axrista_entering-$extra_length; #$thesi_telikou_telous = $hash_start_align{$k}+$te +los_tm-$count_axrista_exiting; $thesi_telikis_arxis = $arxi_tm-$count_axrista_en +tering-$extra_length; $thesi_telikou_telous = $telos_tm-$count_axrista_e +xiting; print 'EXEI ALLAGI:'."\t".$substring_M."\t".length +($substring_M)."\t".'ARXIKA'.($hash_start_align{$k}+$arxi_tm-$count_a +xrista_entering)."-".($hash_start_align{$k}+$telos_tm-$count_axrista_ +exiting)."\t".'TELIKA: '.$thesi_telikis_arxis."-".$thesi_telikou_telo +us."\t".'BIKA ME: '."\t".$count_axrista_entering."\t".'VGIKA ME: '."\ +t".$count_axrista_exiting."\t".'AXRISTA MESA: '."\t".$axrista_M."::"; print substr($copy_seq_no_3D,$thesi_telikis_arxis, +($thesi_telikou_telous-$thesi_telikis_arxis+1)); last; } else # to paw kai pros tis 2 kateuthinseis, arkei +ta oria tou telikou TM na apexoun toulaxiston kata 1 AA apo to proigo +umeno/epomeno kommati($actual_length_epomeno-$extra_length>=1 && $act +ual_length_proigoumeno-$extra_length<1) #an exw provlima me to mik +os tou proigoumeno kommatiou, alla to epomeno-extra_AA>1 paw pros ta +deksia anagkastika { =pod my $str = "ABCDEFGHIJKLMNOP"; my $seed = "FGHI"; my $length = 7; my $slen = length $seed; my $ipos = index $str,$seed; my $lexpand = $slen < $length ? ($length - $slen)/2 : 0; $lexpand = $ipos < $lexpand ? $ipos : $lexpand; print +(substr $str,($ipos - $lexpand),$length),"\n"; =cut $thesi=index($copy_seq_no_3D,$substring_M); $orio_epomenou=$split_copy_seq_no_3D $extra_AAs = substr($copy_seq_no_3D,$thesi,$extra_ +length); $final_tm=$extra_AAs.$substring_M; $thesi_telikis_arxis = $hash_start_align{$k}+$arx +i_tm-$count_axrista_entering; $thesi_telikou_telous = $hash_start_align{$k}+$tel +os_tm-$count_axrista_exiting+$extra_length; print 'EXEI ALLAGI:'."\t".$substring_M."\t".length +($substring_M)."\t".$thesi_telikis_arxis."\t".$thesi_telikou_telous." +\t".'BIKA ME: '."\t".$count_axrista_entering."\t".'VGIKA ME: '."\t".$ +count_axrista_exiting."\t".'AXRISTA MESA: '."\t".$axrista_M."::"; print substr($copy_seq_no_3D,$thesi_telikis_arxis, +($thesi_telikou_telous-$thesi_telikis_arxis+1)); last; } } } } } print "\n"; }
In reply to Re: Can we "grow" a string?
by Anonymous Monk
in thread Can we "grow" a string?
by Anonymous Monk
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |