#!/usr/bin/perl use strict; use warnings; # to modify cleangene # an infile (to be read in)and an outfile (to write to) # and both should be open my $infile = "clean.txt"; #output of batch entrez gene cleaned open (IN, $infile) or die "can't open file: $!"; my $outfile = "genetable.txt"; open (OUT, ">$outfile") or die "can't open file: $!"; my $kegg_found = 0; # reading one line at a time using the FILE handle while () { if (/^\d+:\s\w.+/) { # disecting the first line into locus tag and name my $name = $_; $name =~ s/\[\sArabidopsis\sthaliana\s\]|\[\sArabidopsis\s|\[//; $name =~ s/^\d+:\s//; my @array = split /\s+/, $name; my $locus_tag = $array[0]; print OUT "$locus_tag\n \n"; $name =~ s/^AT\w+|\w+//; $name =~ s/^\s//; print OUT "$name\n"; } if (/^Function\sEvidence/) { print OUT "Unknown\n\n" unless $kegg_found; $kegg_found = 0; } next if /^Function\sEvidence|^Process\sEvidence|^Component\sEvidence|^\d+:\s\w.+/; if (/KEGG\spathway:|\w+\d{5}\s/){ #removing "KEGG pathway" from the kegg description my $kegg = $_; $kegg =~ s/^KEGG\spathway:\s//; $kegg =~ s/KEGG/ KEGG/g; $kegg =~ s/KEGG\spathway:\s//g; print OUT "$kegg"; $kegg_found = 1; } else { print OUT $_; } } close OUT; close IN; #### use strict; use warnings; #### Scalar value @array[0] better written as $array[0]