#!/usr/bin/perl # to modify cleangene # an infile (to be read in)and an outfile (to write to) # and both should be open $infile = "clean.txt"; #output of batch entrez gene cleaned open (IN, $infile) or die "can't open file: $!"; $outfile = "genetable.txt"; open (OUT, ">$outfile") or die "can't open file: $!"; # reading one line at a time using the FILE handle while () { if ($_ =~ /^(\d+:\s\w.+)/) { # disecting the first line into locus tag and name $name = $_; $name =~ s/(\[\sArabidopsis\sthaliana\s\])|(\[\sArabidopsis\s)|(\[)//; $name =~ s/^\d+:\s//; @array = split(/\s+/, $name); $locus_tag = @array[0]; print OUT "$locus_tag\n \n"; $name =~ s/^(AT\w+)|(\w+)//; $name =~ s/^\s//; print OUT "$name\n"; } next if /(^Function\sEvidence)|^(Process\sEvidence)|^(Component\sEvidence)|^(\d+:\s\w.+)/; if ($_ =~ /(KEGG\spathway:)|(\w+\d\d\d\d\d\s)/){ #removing "KEGG pathway" from the kegg description $kegg = $_, $kegg =~ s/^(KEGG\spathway:\s)//; $kegg =~ s/KEGG/ KEGG/g; $kegg =~ s/(KEGG\spathway:\s)//g; print OUT "$kegg"; } else {print OUT $_;} }