#!/usr/bin/perl
# http://perlmonks.org/?node_id=1128822
use strict;
use warnings;
$_ = <<END; # input
1 H 35
1 C 22
2 H 20
2 C 30
A 1 HB2 MET 1
A 2 CA MET 1
A 3 HA ASP 2
A 4 CA ASP 2
END
=output wanted
1 MET HB2 35
1 MET CA 22
2 ASP HA 20
2 ASP CA 30
=cut
#print "$1 $5 $4 $3\n" while /^(\S+)\s+(\w)\s+(\S+)(?=.*\n\n.*^\S+\s
++\S+\s+(\2..)\s+(\S+)\s+\1\b)/gms;
# expanded for clarity
print "$1 $5 $4 $3\n" while
/ # match
^ # starting at the start of a line
(\S+) # capture first field
\s+ # skip whitespace
(\w) # capture letter in column 2
\s+ # skip whitespace
(\S+) # capture third field
(?= # zerowidth positive lookahead
.* # skipping to
\n\n # the empty line separating first and second file
# this guarantees the patterns above this are in the first f
+ile
# and the patterns below are in the second file
.* # skipping to
^ # start of a line in second file
\S+ # skip first field (not needed)
\s+ # skip whitespace
\S+ # skip second field (not needed)
\s+ # skip whitespace
(\2..) # capture third field if it starts with previously captured
+letter (three wide)
\s+ # skip whitespace
(\S+) # capture fourth field
\s+ # skip whitespace
\1 # make sure fifth field matches first field of first file.
\b # insure complete match
) # end of zerowidth lookahead
/gmsx; # global, match any start of line, . matches \n, expanded
__END__
|