in reply to parsing with two files
Your problem description was not exact. You didn't tell and the example data didn't tell either whether multiple event id's can follow one code id. You once used plural for event ids and in another sentence you used the singular.
If only one event id can follow one code id, I thought I'd skip the multiple reading of file2. Instead using a lookup table I would parse the valid code id's in advance.
#! /usr/bin/perl use strict; use warnings; use Data::Dumper; # State machine use constant CODE => 'CODE'; use constant EVENT => 'EVENT'; # Codes pre-parsed from file1 my %codes = ( 12131 => 1, 34234 => 1, 53435 => 1, 46566 => 1, 34522 => 1, ); my %results; my ($code_id, $event_id); my $state = 'CODE'; my $re_code = qr{Code \s+ id \s+ -(\d+)- }x; my $re_event = qr{Event \s+ id \s+ (-\d+-) }x; LINE: while( <DATA> ) { chomp(); if( ($state eq CODE) && /$re_code/ ) { # Check that code is valid if( $codes{$1} ) { $code_id = $1; $state = EVENT; } else { next LINE; } } else { if( ($state eq EVENT) && /$re_event/ ) { $results{$code_id} = $1; $state = CODE; } else { next LINE; } } } print Dumper(\%results); 1; __DATA__ some content some content some content some content some content blah. blah. Code id -46566- some content some content some content some content Event id -445778441211- some content some content some content some content some content some content some content Code id -12131- some content some content some content some content some content some content some content some content some content Event id -123443111131- Code id -12342- some content some content some content some content some content some content some content some content Event id -445987432141-
Output:
$VAR1 = { '12131' => '-123443111131-', '46566' => '-445778441211-' };
Update: removed the odd __DATA2__ tag from the end..
|
|---|