#!/usr/bin/perl # Task: Extract GeneID-Number and gene information use strict; use warnings; use Data::Dump; my $in; my $hr_data; # 1) open the .gff Inputfile and while reading line by line split $data at each tab and put them in the @array open ($in, '<', "Genomteil.gff") or die $!; while (my $line1 = readline ($in)) { chomp ($line1); # Removes trailing \n my @a_line1 = split ("\t", $line1); if ($a_line1[2] eq 'gene') { if ($a_line1[8] =~ /.*;db_xref=GeneID:(\d+)/) { $GeneID = $1; # We found a GeneID. Create a record (array-reference) to store with the data from this line my $ar_record = [$a_line1[3], $a_line1[4], $a_line1[6]]; #the array will be used as values for my hash later # Also, read the next line from file, which we expect to contain CDS or exon my $line2 = readline ($in); chomp ($line2); my @a_line2 = split ("\t", $line2); if ($a_line2[2] =~ /CDS|exon/) { # Alternatively: ($a_line2[2] eq 'CDS' or $a_line2[2] eq 'exon') push (@{$ar_record}, $a_line2[2]); $hr_data->{$GeneID} = $ar_record; } else { print ("Error: next line does not contain CDS or exon [$.]\n"); next; } } else { print ("Error: 'gene' textblock found, but no GeneID present at line [$.]\n"); next; } } ## end if ($a_line1[2] eq 'gene') } ## end while (my $line1 = readline...) close $in; Data::Dump::dd($hr_data);