#Written by Prasuna Dahal use warnings; use XML::Simple; use LWP::UserAgent; use HTTP::Request::Common; use URI::Escape; use Text::CSV; use Data::Dumper; my $i = 0; my @keystr; my @kwrds = {"inhibitors", "activity", "complex", "activator", "activity", "activities", "activated", "proteins", "deficiency", "levels", "functions", "reductions", "protease", "proteases"}; my $file = "proteinlist.csv"; my $ua = LWP::UserAgent->new; my $csv = Text::CSV->new({sep_char => ','}); #Open the result in a CSV file open (my $fh, ">", "test1.csv"); print $fh "Valid Proteins\n"; #open file containig protein_name #open(my $data1, '<', "proteinlist.csv"); #Open the file containing the PubMed IDs open(my $data, '<', "test.csv"); while (my $line = <$data>) { chomp $line; if ($csv->parse($line)) { #Skip 1st line next if ($. == 1); my @fields = $csv-fields(); #Replace (-) with (,) $fields[0] =~ tr/-/,/; my $id = $fields[0]; #Initialize http request my $args = "db=pubmed&id=$id&retmode=text&rettype=abstract"; my $req = new HTTP::Request POST => 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'; $req->content_type('application/x-www-form-urlencoded'); $req->content($args); #Get response my $response = $ua->request($req); my $content = $response->content; $fields[0] =~ tr/,/-/; my @abstract = split (/[.]/, $content); for $abstract(@abstract){ @var1 = split ((/[ ]|[,]/), $abstract); foreach $var1(@var1){ open(my $data1, "<", $file); while (my $line1 = <$data1>) { chomp $line1; my @fields1 = split ",", $line1; $keystr[$i] = $fields1[0]; $i++; if ($var1=~ /\b$keystr[$i]\b/i){ foreach $kwrds(@kwrds){ if ($var1=~ /\b$kwrds\b/i){ my $valid_prot = $var1; } } print $fh "$valid_prot\n"; print "$valid_prot\n"; } } } } } } close($fh);