my $id_article = 0; for ($i_article = 0; $i_article < @output_concord_files_prepare; $i_article++){ $dbh->do(" INSERT INTO `article`(`id_article`, `url`, `html_extr_text`,`concord_file`, `sys_time`) VALUES ('$id_article', '$url_prepare[$i_article]', '$html_pages_files_extended[$i_article]', '$output_concord_files_prepare[$i_article]', '$sys_time_prepare[$i_article]') ") || die $dbh->errstr; } $id_article++; #### my $id_event = 0; for ($i_event = 0; $i_event < @event_prepare; $i_event++){ $dbh->do(" INSERT INTO `event`(`id_event`, `event`) VALUES ('$id_event', '$event_prepare[$i_event]') ") || die $dbh->errstr; } $id_event++; #### $create_query = qq{ create table article_event_index( id_article int(10) NOT NULL, id_event int(10) NOT NULL, primary key (id_article, id_event), foreign key (id_article) references article (id_article), foreign key (id_event) references event (id_event) ) }; $dbh->do($create_query); #### #!/usr/bin/perl -w use strict; use locale; use warnings; #use diagnostics; use utf8; binmode(STDIN, "encoding(utf8)"); binmode(STDOUT, "encoding(utf8)"); binmode(STDERR, "encoding(utf8)"); #Directory with Unitex output files my @output_concord_files = glob("output_concord/*.txt"); #Using 'glob' implies random order of files => sort @output_concord_files = map{$_->[1]} sort{$a->[0] <=> $b->[0]} map{/output_concord\/concord\.(.*)\.txt/; [$1, $_]} @output_concord_files; my $index_file = "index.txt"; open (INDEX, '>:utf8', $index_file) || die "Couldn't open $index_file : $!\n"; my $event; foreach my $output_concord_file(@output_concord_files){ open (my $fh, '<:utf8', $output_concord_file) || die "Couldn't open $output_concord_file : $!\n"; while (<$fh>){ if ($_ =~ /=E-(.*)=event/){ $event = $1; print "$output_concord_file -> $event\n"; print INDEX "$output_concord_file -> $event\n"; } } }