#!/usr/bin/perl use strict; use Data::Dumper; use Storable; ## 5_prime_ref.fasta => # >chr1 67093601 67093610 CTACAGGAA # >chr1 67095418 67095427 TTTCAGAAA # >chr1 67096318 67096327 TTTCAGCTC # >chr1 67115461 67115470 TTCCAGGCA # >chr1 67125906 67125915 CTGCAGATA # >chr1 67127254 67127263 AAATAGGAA # >chr1 67131224 67131233 TAATAGATT # >chr1 67093601 67093610 CTACAGGAA # >chr1 67096318 67096327 TTTCAGCTC # >chr1 67103379 67103388 GTTTAGATG # >chr1 67111641 67111650 CTTTAGAGC # >chr1 67115461 67115470 TTCCAGGCA # >chr1 67125906 67125915 CTGCAGATA # >chr1 67127254 67127263 AAATAGGAA # >chr1 67131224 67131233 TAATAGATT # >chr1 67093601 67093610 CTACAGGAA # >chr1 67096318 67096327 TTTCAGCTC # >chr1 67103379 67103388 GTTTAGATG # >chr1 67111641 67111650 CTTTAGAGC # >chr1 67113753 67113762 ATATAGTGA # >chr1 201324579 201324588 GGGTAAGGT open my $fasta5, '<', '5_prime_ref.fasta'; chomp (my @fasta5 = <$fasta5>); close $fasta5; my ($altsequence1, $altsequence2, $altsequence3, @newfasta5, @newfasta3, $sequence, @splity, $test, $j, $genomicposition, $ref, $alt1, $alt2, $alt3, $refsequence); ## for 5 prime ## >chr1 201324579 201324588 GGGTAAGGT foreach my $line (@fasta5) { ## need to iterate 9 times through j... $j = 0; while ($j < 9) { @splity = split ("\t", $line); my $ref = $splity[3]; $sequence = $splity[3]; $refsequence = $splity[3]; $genomicposition = $splity[1]; $genomicposition = $genomicposition + 1; #first nucleotide is counted as first column + 1, not as the left boundary $splity[0] = ">".$splity[0]; ##if ($character $j in sequence =~ matches A) { if (substr ($sequence, $j, 1) =~ m/A/) { $ref = "A"; $genomicposition = $genomicposition + $j; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/C/; $altsequence1 = $sequence; $alt1 = "C"; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/G/; $altsequence2 = $sequence; $alt2 = "G"; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/T/; $altsequence3 = $sequence; $alt3 = "T"; } elsif (substr ($sequence, $j, 1) =~ m/C/) { $test = substr ($sequence, $j, 1); $ref = "C"; $genomicposition = $genomicposition + $j; $sequence =~ s/$test/A/; $altsequence1 = $sequence; $alt1 = "A"; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/G/; $altsequence2 = $sequence; $alt2 = "G"; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/T/; $altsequence3 = $sequence; $alt3 = "T"; } elsif (substr ($sequence, $j, 1) =~ m/T/) { $test = substr ($sequence, $j, 1); $ref = "T"; $genomicposition = $genomicposition + $j; $sequence =~ s/$test/A/; $altsequence1 = $sequence; $alt1 = "A"; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/G/; $altsequence2 = $sequence; $alt2 = "G"; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/C/; $altsequence3 = $sequence; $alt3 = "C"; } else { $ref = "G"; $genomicposition = $genomicposition + $j; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/A/; $altsequence1 = $sequence; $alt1 = "A"; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/T/; $altsequence2 = $sequence; $alt2 = "T"; $test = substr ($sequence, $j, 1); $sequence =~ s/$test/C/; $altsequence3 = $sequence; $alt3 = "C"; } ## print the new 3 lines my $newfasta5alt1 = join ("\t", $refsequence, $splity[0], $splity[1], $splity[2], $altsequence1, $ref, $alt1, $genomicposition); my $newfasta5alt2 = join ("\t", $refsequence, $splity[0], $splity[1], $splity[2], $altsequence2, $ref, $alt2, $genomicposition); my $newfasta5alt3 = join ("\t", $refsequence, $splity[0], $splity[1], $splity[2], $altsequence3, $ref, $alt3, $genomicposition); my $newfasta5 = join ("\n", $newfasta5alt1, $altsequence1, $newfasta5alt2, $altsequence2, $newfasta5alt3, $altsequence3); push (@newfasta5, $newfasta5); $j++;} next; } print Dumper @newfasta5;