in reply to Palindrome sequence from file containing mutliple sequences
#!/usr/bin/perl use warnings; use strict; open my $IN, '<', 'sample.txt' or die $!; my $pat = qr/^(Contig *([0-9]*))\s/; my $count = 0; my @regexes; for my $n (5 .. 20) { my $re = qr /[CAGU]{$n}/; $regexes[$n-5] = $re; } my %palhash; my $contig; LINE: while ($count < 1000) { my $line = <$IN> ; defined $line or last; $contig = $line if $line =~ /$pat/; ++$count; for my $value (@regexes) { my $start = 0; while ($line =~ /$value/g) { my $endline = $'; my $match = $&; my $revmatch = reverse($match); $revmatch =~ tr/CAGU/GUCA/; if ($endline =~ /^([CAGU]{0,15})($revmatch)/) { $start = 1; my $palindrome = $match . "*" . $1 . "*" . $2; $palhash{$contig}{$palindrome}++; } } next LINE if $start == 0; } } close $IN; for my $contig (keys %palhash) { print $contig; while (my ($key, $value) = each (%{ $palhash{$contig} })) { print "$key => $value\n"; } }
I changed some parts of the code as well, e.g. 3 argument open or die, lexical filehandles, don't process if the input is short, no goto. I don't understand the requirements in detail, but moving away from $' and $& would be a good idea, too.
|
|---|
| Replies are listed 'Best First'. | |
|---|---|
|
Re^2: Palindrome sequence from file containing mutliple sequences
by reciter (Novice) on Feb 21, 2015 at 04:49 UTC |