#Discovery of simple motifs in DNA sequences #Input Files:Foreground sequences and background sequences. #!/usr/bin/perl #----------------------------------------------------------------------- #initialize variables #------------------------------------------------------------------------ use strict; use warnings; my $fgcount; my $count=0; my $motif; my @motifs=''; my $dna =''; my $seq=''; my $sequence=''; my $sequences=''; #my $line=''; my @dna = ( ); my $file = ' '; my @file =( ); my @rna; my $length ; my $combinations ; my $combinations2 ; my $counter; $seq='foreground_faa.txt'; #------------------------------------------- #subroutine to open file for readability #------------------------------------------- sub get_file{ @dna = @_; unless (open(IFILE, $seq)) { print "could not open file sequence!\n"; exit; } @dna = ; close IFILE; return @dna; } #-------------------------------------------- #main program. #-------------------------------------------- get_file($seq); $file=extract_sequence(@dna); join_seq($sequence); create_motif($combinations); create_motif2($combinations2); get_all_motifs(@motifs); #get_motif(@file); #get_motif($dna); #-------------------------------------------- #subroutine to extract seq from header file #-------------------------------------------- sub extract_sequence { #initialize variables @dna=@_; $sequence=''; foreach my $line (@dna){ if ($line =~ /^\s*$/) { #discard blank line next; }elsif ($line =~ /^>/){ #discard fasta header next; }else { $sequence .=$line; } } #print $sequence,"\n"; return $sequence; } #-------------------------------- #Subroutine to join sequences #-------------------------------- sub join_seq { @file = split ('',$file); #print @file, "\n"; return @file; } #------------------------------------------------------------------------------ #Subroutines to discover motifs of the lenghth 5 #------------------------------------------------------------------------------ sub create_motif { #creates avery possible 5-mer my $base = join ",", qw/A T C G/; my $L = 5; #my $L2 = 6; my $string = "{$base}" x $L; my $combinations = glob $string; #print join ":", $combinations; return join ":", $combinations; } #------------------------------------------------------------------------------ #Subroutines to discover motifs of the lenghth 6 #------------------------------------------------------------------------------ sub create_motif2 { #creates every possible 6-mer my $base2 = join ",", qw/a t c g/; my $L2 = 6; my $string2 = "{$base2}" x $L2; my $combinations2 = glob $string2; #print join ":", $combinations2; return join ":", $combinations2; } #---------------------------------------------------------------------- #subroutine check all the available motifs in the file #---------------------------------------------------------------------- sub get_all_motifs { $combinations=$combinations2; @motifs=$combinations; print @motifs; return @motifs; } foreach $combinations (@motifs){ foreach $sequence(@file) { if($sequence =~/$combinations/){ $counter++; } } print $sequence; return $sequence; #exit; } #---------------------------------------------------------------------- #subroutine check all the available motifs in the file #---------------------------------------------------------------------- sub calculate_percentage { #my $percentage; my $percentage = $counter / scalar(@file); print "$percentage is "; if ($percentage =>90); #print $motif; #return $motif_sequences; }