#!/usr/bin/perl -w use strict; use locale; use warnings; #use diagnostics; use utf8; binmode(STDIN, "encoding(utf8)"); binmode(STDOUT, "encoding(utf8)"); binmode(STDERR, "encoding(utf8)"); #Directory with Unitex output files my @output_concord_files = glob("output_concord/*.txt"); #Using 'glob' implies random order of files => sort @output_concord_files = map{$_->[1]} sort{$a->[0] <=> $b->[0]} map{/output_concord\/concord\.(.*)\.txt/; [$1, $_]} @output_concord_files; my $index_file = "index.txt"; open (INDEX, '>:utf8', $index_file) || die "Couldn't open $index_file : $!\n"; my $event; foreach my $output_concord_file(@output_concord_files){ open (my $fh, '<:utf8', $output_concord_file) || die "Couldn't open $output_concord_file : $!\n"; while (<$fh>){ if ($_ =~ /=E-(.*)=event/){ $event = $1; print "$output_concord_file -> $event\n"; print INDEX "$output_concord_file -> $event\n"; } } }