use warnings; use strict; use File::Basename; my $name = basename($0); my $usage = "\nUsage (OSX Terminal/Windows Cmd-Prompt): perl <$name> <.FASTA or.FA File> \n\n"; #Scanning for restriction sites and length-output my $infile1 = shift or die $usage; open(my $in, "<", shift); open(my $out, ">", shift); my $DNA = read_fasta($in); my $len = length($$DNA); print "\n FASTA/Sequence Length is: $len bp \n"; my @pats=qw( GATCR GGCC ); for (@pats) { s/K/[GT]/g; s/M/[AC]/g; s/Y/[CT]/g; s/S/[CG]/g; s/W/[AT]/g; s/B/[CGT]/g; s/V/[ACG]/g; s/H/[ACT]/g; s/D/[AGT]/g; s/X/[AGCT]/g; s/R/[AG]/g; s/N/[AGCT]/g; } for (@pats) { my $m = () = $$DNA =~ /$_/gi; print "\n Total DNA matches to $_ are: $m \n"; } my $pat=join("|",@pats); my @cutarr = split(/$pat/, $$DNA); for (@cutarr) { my $len = length($_); print $out "$len \n"; } close($out); close($in); #Subfunction - Reading formatted FASTA/FA files sub read_fasta { my ($in) = @_; my $sequence = ""; while(<$in>) { my $line = $_; chomp($line); if($line =~ /^>/){ next } else { $sequence .= $line } } return(\$sequence); }