#!/usr/bin/perl -w use strict; use Data::Dumper; #open file open(IN,"CCDS.20090902.txt") or die " Can't open ccds file: $!"; # initialize the hash my %geneids =(); #open the file and push the info from the designated columns into it # remove header my $firstline = ; chomp $firstline; while(){ chomp; # remove the newline character my @fields = split (/\t/, $_); #extract the columns that we are interested in. my($key, $value) = ($fields[2], $fields[3]); # Populate the key value pairs of the hash with $gene and $id $geneids{$key} = $value; print "$key\t$value\n"; } # We can also get the size of the hash print "Hash size: ", scalar keys %geneids, "\n"; close(); __DATA__ #chromosome nc_accession gene gene_id ccds_id ccds_status 1 NC_000001.8 NCRNA00115 79854 CCDS1.1 Withdrawn 1 NC_000001.10 SAMD11 148398 CCDS2.2 Public 1 NC_000001.10 NOC2L 26155 CCDS3.1 Public 1 NC_000001.10 PLEKHN1 84069 CCDS4.1 Public 1 NC_000001.10 HES4 57801 CCDS5.1 Public 1 NC_000001.10 ISG15 9636 CCDS6.1 Public 1 NC_000001.10 C1orf159 54991 CCDS7.2 Public 1 NC_000001.10 TTLL10 254173 CCDS8.1 Public 1 NC_000001.10 TNFRSF18 8784 CCDS9.1 Public 1 NC_000001.10 TNFRSF18 8784 CCDS10.1 Public 1 NC_000001.10 TNFRSF4 7293 CCDS11.1 Public 1 NC_000001.10 SDF4 51150 CCDS12.1 Public 1 NC_000001.10 B3GALT6 126792 CCDS13.1 Public 1 NC_000001.10 UBE2J2 118424 CCDS14.1 Public