Assuming that you are storing your sequences in files with meaningful names you could do something like this. Given these two example sequence files
knoppix@Microknoppix:~/perl/Monks$ head -99 spw961600_seq* ==> spw961600_seqA <== TCCAGATCCCTGGGGCCCCTGGGTGAGGGCAGCCAGACGCAACGTCTGGAGGAAGCT ==> spw961600_seqB <== CTGCGTTTCGACGCCATGGCTGAGCTGGAGACGGTCCTGCCCATGCTGCTC knoppix@Microknoppix:~/perl/Monks$
this script
use strict; use warnings; use Data::Dumper; my @seqFiles = glob q{spw961600_seq*}; my %counts; foreach my $seqFile ( @seqFiles ) { my $seq = do { open my $seqFH, q{<}, $seqFile or die qq{open: < $seqFile: $!\n}; local $/; <$seqFH>; }; while ( $seq =~ m{(?=(..))}g ) { $counts{ totals }->{ $1 } ++; $counts{ $seqFile }->{ $1 } ++; } } print Data::Dumper->Dumpxs( [ \ %counts ], [ qw{ *counts } ] );
builds this data structure
%counts = ( 'spw961600_seqB' => { 'AC' => 2, 'AG' => 2, 'CC' => 4, 'TG' => 7, 'AT' => 2, 'TC' => 3, 'GA' => 4, 'TT' => 2, 'CT' => 6, 'GG' => 3, 'CG' => 4, 'CA' => 2, 'GC' => 7, 'GT' => 2 }, 'spw961600_seqA' => { 'AC' => 2, 'AG' => 6, 'CC' => 7, 'TG' => 4, 'AT' => 1, 'TC' => 3, 'AA' => 2, 'GA' => 5, 'CT' => 4, 'CG' => 2, 'GG' => 9, 'GC' => 5, 'CA' => 4, 'GT' => 2 }, 'totals' => { 'AC' => 4, 'AG' => 8, 'CC' => 11, 'TG' => 11, 'AT' => 3, 'TC' => 6, 'AA' => 2, 'GA' => 9, 'TT' => 2, 'CT' => 10, 'CG' => 6, 'GG' => 12, 'GC' => 12, 'CA' => 6, 'GT' => 4 } );
I hope I have understood your question correctly and that this will help you move forward.
Cheers,
JohnGG
In reply to Re: Counting overlapping dimers for multiple sequences
by johngg
in thread Counting overlapping dimers for multiple sequences
by sally123
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |