use strict;
use warnings;

my @file_data = ();
my %motif_hash = ();
my $dna;
my $regexp;
my @positions = (); 
my $site;
my @motifs;
my $match;
my $count;

@file_data = get_file_data("promoters.txt");
$dna = extract_sequence(@file_data);
$regexp = parseMOTIF('motifs.txt');
print "Regexp: '$regexp'\n";


$match = match_positions($count);
push(@positions,$match);
print "match : $match";

sub match_positions{
	my($regexp,$dna) = @_;
	my $count= 0;
	my @positions;
	my $regexp;
	my $dna;
	while ($dna =~ /$regexp/g){
		push(@positions,pos($dna)-length($&)+1);
		++$count;
		}
		return $count;
}

sub parseMOTIF {
	use strict;
	use warnings;
	my @motiffile = ();
	my $name;
	my $site;
	my $regexp;
	my %motif_hash = ();
	my $motiffile = $_[0];


	@motiffile = get_file_data($motiffile);

    foreach $motiffile(@motiffile){
	    if($motiffile =~ /(^[A|S]\d+\s+|^[A|S]\d+b\s+)([A-Z]+)(\s+.*)$/){
	    my $name = $1;
	    my $site = $2;
	    $regexp = IUB_to_regexp($site);
	    $motif_hash{$name} = "$site $regexp\n";
	    print "motif : $site\n";
	    print "The regexp : $regexp\n";
	    } 	
   		}	
   		return $regexp;

}

sub get_file_data{
    my ($filename)=@_;
    use strict;
    use warnings;
    my @filedata=();
    
    unless(open(GET_FILE_DATA, $filename)){
    	print STDERR "can't open file $filename\"\n\n";
    	exit;
    }
    
    @filedata = <GET_FILE_DATA>;
    close GET_FILE_DATA;
    return @filedata;
}

sub extract_sequence {
     use strict;
     my (@file_data) = @_;
     use warnings;
     my $sequence = "";
     foreach my $line(@file_data) {
     if ($line =~ /^>/){
     next;
     }
     else {
     $sequence .= $line; 
     }
    }
    #$sequence =~ s/\s//g;
    return $sequence;
}

sub IUB_to_regexp{
	my($iub) = @_;
	my $regular_expression ="";
	my %iub2char_class = (
				A =>'A',   
				C =>'C',
				G =>'G',
				T =>'T',
				R =>'GA',
				Y =>'CT',
				M =>'AC',
				K =>'GT',
				S =>'GC',
				W =>'AT',
				B =>'CGT',
				D =>'AGT',
				H =>'ACT',
				V =>'ACG',
				N =>'ACGT',
				);

	for (my $i=0;$i<length($iub);$i++){
	$regular_expression .= $iub2char_class{substr($iub,$i,1)};
	}
	return $regular_expression;
}