in reply to Re^6: mismatching characters in dna sequence
in thread mismatching characters in dna sequence

basically output the total number of each type of transition?

Per sequence compared, or for all the sequences?


With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.

The start of some sanity?

  • Comment on Re^7: mismatching characters in dna sequence

Replies are listed 'Best First'.
Re^8: mismatching characters in dna sequence
by prbndr (Acolyte) on Dec 30, 2011 at 21:05 UTC
    for all the sequences

      Like this?

      #! perl -slw use strict; use Inline C => Config => BUILD_NOISY => 1; use Inline C => <<'END_C', NAME => 'dnacmp', CLEAN_AFTER_BUILD => 0; void dnacmp( char *a, char *b ) { int n = 0; Inline_Stack_Vars; Inline_Stack_Reset; while( *a && *b ) { if( *a != *b ) { Inline_Stack_Push( sv_2mortal( newSVpvf( "%d:%c:%c", n, *a +, *b ) ) ); } a++; b++; n++; } Inline_Stack_Done; return; } SV *dnacmp2( char *a, char *b ) { SV *res = newSVpv( "", 0 ); int n = 0; while( *a && *b ) { if( *a != *b ) sv_catpvf( res, "%d:%c:%c ", n, *a, *b ); ++a, ++b, ++n; } return res; } END_C use Data::Dump qw[ pp ]; sub rndStr{ join'', @_[ map{ rand @_ } 1 .. shift ] } our $N //= 10e3; our $L //= 8; my @seqs; $#seqs = $N-1; $seqs[ $_ ] = rndStr( $L, qw[ A C G T ] ) for 0 .. $#seqs; my %stats; for my $i ( 0 .. $#seqs ) { for my $j ( $i+1 .. $#seqs ) { m[\d+:(.:.)] and ++$stats{ $1 } for dnacmp( @seqs[ $i, $j ] ); } } pp \%stats; __END__ C:\test>dnacmp -N=10 -L=40 { "A:C" => 102, "A:G" => 137, "A:T" => 113, "C:A" => 105, "C:G" => 125, "C:T" => 113, "G:A" => 112, "G:C" => 96, "G:T" => 110, "T:A" => 117, "T:C" => 84, "T:G" => 144, }

      With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
      Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
      "Science is about questioning the status quo. Questioning authority".
      In the absence of evidence, opinion is indistinguishable from prejudice.

      The start of some sanity?

        yup. i don't get the output though...is it a Data::Dump problem? here's my code:
        #!/opt/local/bin/perl use Inline C => Config => BUILD_NOISY => 1; use Inline C => <<'END_C', NAME => 'dnacmp', CLEAN_AFTER_BUILD => 0; void dnacmp( char *a, char *b ) { int n = 0; Inline_Stack_Vars; Inline_Stack_Reset; while( *a && *b ) { if( *a != *b ) { Inline_Stack_Push( sv_2mortal( newSVpvf( "%d:%c:%c", n, *a +, *b ) ) ); } a++; b++; n++; } Inline_Stack_Done; return; } SV *dnacmp2( char *a, char *b ) { SV *res = newSVpv( "", 0 ); int n = 0; while( *a && *b ) { if( *a != *b ) sv_catpvf( res, "%d>%c:%c ", n, *a, *b ); ++a, ++b, ++n; } return res; } END_C use Bio::Seq; use Bio::DB::Sam; use Data::Dump qw[ pp ]; my $bamfile = Bio::DB::Sam->new( -bam => "eg1.bam" ); my @allReads = $bamfile->features( -type => 'match' ); my %stats; for my $read (@allReads) { my ( $ref, $matches, $query ) = $read->padded_alignment; m[\d+:(.:.)] and ++$stats{ $1 } for dnacmp2( $ref, $query ); print $read->qname, "\n", dnacmp2( $ref, $query ), "\n"; } pp \%stats;