# Task: Extract GeneID-Number and gene information #!/usr/bin/perl use strict; use warnings; my $in; my $data; my @array; my $array; my $GeneID; my @BMB; my $BMB; my $flag = 0; my %hash; my $hash; # 1) open the .gff Inputfile and while reading line by line split $data at each tab and put them in the @array open $in, '<', "Genomteil.gff" or die $!; while ($data = <$in>) { @array = split(/\t/, $data); if ($array [2] =~/gene/){ #if you find the word 'gene' a textbloxk follows which contains some information I want to extract and put in an array) $flag = 1; @BMB = ($array[3], $array[4], $array[6]); #the array will be used as values for my hash later } if ($array[2] =~/CDS/){ push (@BMB, $array[2]); #put more data in my array } elsif ($array[2] =~/exon/){ push (@BMB, $array[2]); } if ($array[8] =~ /.*;db_xref=GeneID:(\d+)\n/) { #if you find the word 'GeneID' extract the following number and put it in my hash (as key), then put the array in my hash $GeneID = $1; @{$hash {$GeneID}} = @BMB; } if ($array [8]=~ /.*;exon_number=1/){ #if you find the word 'exon number', then the textblock is over $flag = 0; } } close $in; while ( ($GeneID, $BMB) = each %hash) { print "$GeneID => $BMB[0]\n"; }