in reply to Matching elements in two arrays and printing the element next to the match.
With this solution, we look at each line of fileA and fileB exactly once, and we use a hash lookup on IDs which is fast. This reduces our complexity from O(n^2)+ from the previous solution to something closer to O(n log n), possibly close to O(n) if we're lucky with our ID hashing.
#!/usr/bin/perl use warnings; use strict; # open filea and parse all id strings. # Add id strings as keys to %wanted array. my %wanted; { open my $file, '<', "filea" || die "failed to open filea : $!"; while( <$file>) { chomp; @ids = split( /\s+/, $_); $wanted{ $_ }++ for @ids; } close $file; } #read fileb, parse lines of the form "id <whitespace> letters" #and print lines that match the id strings from filea. { open my $file, '<', 'fileb' || die "failed to open fileb : $!"; while (<$file>) { chomp; my ($id, $letters) = split( /\s+/, $_); print "$id\t$letters\n" if $wanted{$id}; } } #OR #read fileb, parse lines of the form "id <newline> letters" #and print lines that match the id strings from filea. { open my $file, '<', 'fileb' || die "failed to open fileb : $!"; while (<$file>) { my $id = $_; my $letters = <$file>; chomp($id); chomp($letters); print "$id\t$letters\n" if $wanted{$id}; } } __END__ FileA: 1DWK 2RFK 4ERH FileB: 1DWK HRSDKKDAHJKLSDLDLLJDGHDFJJE 4ERH DFSKFHADFSBVHFWIHFWJBFS 2RFK DADUHRQWERKBNJAIJDLAJDKAKDNAKDJKSADJKAHDJASHRWEUB FileB (alternate): 1DWK HRSDKKDAHJKLSDLDLLJDGHDFJJE 4ERH DFSKFHADFSBVHFWIHFWJBFS 2RFK DADUHRQWERKBNJAIJDLAJDKAKDNAKDJKSADJKAHDJASHRWEUB
|
|---|