#Reference Francis #use warnings; use XML::Simple; use LWP::UserAgent; use HTTP::Request::Common; use URI::Escape; use Data::Dumper; use Text::CSV; use List::Util qw( min max ); my @protein_keywords = ("inhibitors", "inhibitor", "activity", "activitor", "activities", "activated", "proteins", "deficiency", "levels", "functions", "reductions", "protease", "proteases", "complex concentrate"); my $ua = LWP::UserAgent->new; my $csv = Text::CSV->new({ sep_char => ',' }); my $ab_csv = Text::CSV->new({ sep_char => ',' }); #Open result CSV file. open(my $fh, ">", "output1.csv"); print $fh "Pubmed ID, Drug Name, Position of keywords, Valid Proteins, Position of proteins, Minimum separation, Scoring\n"; #open abnormal condition csv file my $i = 0; my @protein_list; open(my $abnorm, '<', "Protein.csv"); while (my $ab_line = <$abnorm>) { chomp $ab_line; if ($ab_csv->parse($ab_line)) { #skip first line next if ($. == 1); my @ab_fields = $ab_csv->fields(); $protein_list[$i] = $ab_fields[0].$ab_fields[1]; $i++; } } #Open specified CSV file open(my $data, '<', "finalidlist.csv"); while (my $line = <$data>) { chomp $line; if ($csv->parse($line)) { #Skip first line next if ($. == 1); } my @fields = $csv->fields(); my $valid_proteins = ""; push my @id , $fields[1]; foreach my $id(@id){ if ($id ne "no"){ #Initialize http request my $args = "db=pubmed&id=$id&retmode=text&rettype=abstract"; my $req = new HTTP::Request POST => 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'; $req->content_type('application/x-www-form-urlencoded'); $req->content($args); #Get response my $response = $ua->request($req); my $content = $response->content; #$content = lc($content); my @abstract = split /[.]/, $content; my $keyword_position = ""; foreach my $protein_keywords(@protein_keywords) { my $i = 0; foreach my $abstract (@abstract){ if($abstract =~ /\b$protein_keywords\b/i) { $keyword_position = $keyword_position . "+" . $i; $i++; } else { $i++; } }} foreach my $protein_list(@protein_list) { my @each_protein_list = split/[+]/, $protein_list; my $i = 0; my $protein_position = ""; foreach my $abstract(@abstract){ my @tempt = split /[,]/, $abstract; foreach my $each_protein(@each_protein_list) { if ($each_protein eq "\U$each_protein"){ $each_protein = uc $each_protein; } foreach my $tempt(@tempt){ #yo tempt wala loop chai , wala kura separate garnalai ho hai but problem solve vako xaina if($tempt =~ /\b$each_protein\b/i) #main modify garne thau yo ho ..match navayara tanaab diyako xa { $valid_proteins = $each_protein; #print $tempt; #print "\n"; #print $each_protein; #print "\n"; $protein_position = $protein_position . "+" . $i; }} } $i++; } if($protein_position ne "") { my $field2 = $keyword_position; my $field3 = $protein_position; my @keywords = split /[+]/, $keyword_position; splice (@keywords, 0, 1); my @proteins = split /[+]/, $protein_position; splice (@proteins, 0, 1); sub uniq { my %seen; grep !$seen{$_}++, @_; } @proteins = uniq(@proteins); my @temp; my $f = 0; foreach my $proteins(@proteins){ my $k = 0; my @difference; foreach my $keywords(@keywords) { my $diff = ($proteins - $keywords); $difference[$k] = abs $diff; $k++; } $temp[$f] = min @difference; $f++; } my $min = min @temp; if($min == 0) { print "$valid_proteins\n"; $scoring = 1; print $fh "$fields[1], $fields[0], $field2, $valid_proteins, $field3, $min, $scoring\n"; print "$fields[1], $fields[0], $field2, $valid_proteins, $field3, $min, $scoring\n"; } elsif($min == 1) { print "not matched\n"; $scoring = 0.5; print $fh "$fields[0], $fields[0], $field2, Unmatched proteins, $field3, $min, $scoring\n"; print "$fields[0], $fields[0], $field2, Unmatched proteins, $field3, $min, $scoring\n"; } else { print "not matched\n"; $scoring = 0.2; print $fh "$fields[1], $fields[0] $field2, Unmatched proteins, $field3, $min, $scoring\n"; print "$fields[1], $fields[0] $field2, Unmatched proteins, $field3, $min, $scoring\n"; } #print $fh "$fields[0], $field2, $protein_list, $field3, $min, $scoring\n"; } } } else { print "not matched\n"; $scoring = 0; print $fh "Undef, $fields[0] $field2, Unmatched proteins, $field3, $min, $scoring\n"; print "Undef, $fields[0] $field2, Unmatched proteins, $field3, $min, $scoring\n"; } } } close($fh);