use strict; use warnings; my @file_data = (); my %motif_hash = (); my $dna; my $regexp; my @positions = (); my $site; my @motifs; my $match; my $count; @file_data = get_file_data("promoters.txt"); $dna = extract_sequence(@file_data); $regexp = parseMOTIF('motifs.txt'); print "Regexp: '$regexp'\n"; $match = match_positions($count); push(@positions,$match); print "match : $match"; sub match_positions{ my($regexp,$dna) = @_; my $count= 0; my @positions; my $regexp; my $dna; while ($dna =~ /$regexp/g){ push(@positions,pos($dna)-length($&)+1); ++$count; } return $count; } sub parseMOTIF { use strict; use warnings; my @motiffile = (); my $name; my $site; my $regexp; my %motif_hash = (); my $motiffile = $_[0]; @motiffile = get_file_data($motiffile); foreach $motiffile(@motiffile){ if($motiffile =~ /(^[A|S]\d+\s+|^[A|S]\d+b\s+)([A-Z]+)(\s+.*)$/){ my $name = $1; my $site = $2; $regexp = IUB_to_regexp($site); $motif_hash{$name} = "$site $regexp\n"; print "motif : $site\n"; print "The regexp : $regexp\n"; } } return $regexp; } sub get_file_data{ my ($filename)=@_; use strict; use warnings; my @filedata=(); unless(open(GET_FILE_DATA, $filename)){ print STDERR "can't open file $filename\"\n\n"; exit; } @filedata = ; close GET_FILE_DATA; return @filedata; } sub extract_sequence { use strict; my (@file_data) = @_; use warnings; my $sequence = ""; foreach my $line(@file_data) { if ($line =~ /^>/){ next; } else { $sequence .= $line; } } #$sequence =~ s/\s//g; return $sequence; } sub IUB_to_regexp{ my($iub) = @_; my $regular_expression =""; my %iub2char_class = ( A =>'A', C =>'C', G =>'G', T =>'T', R =>'GA', Y =>'CT', M =>'AC', K =>'GT', S =>'GC', W =>'AT', B =>'CGT', D =>'AGT', H =>'ACT', V =>'ACG', N =>'ACGT', ); for (my $i=0;$i