my $id_article = 0;
for ($i_article = 0; $i_article < @output_concord_files_prepare; $i_article++){
$dbh->do("
INSERT INTO `article`(`id_article`, `url`, `html_extr_text`,`concord_file`, `sys_time`)
VALUES ('$id_article', '$url_prepare[$i_article]', '$html_pages_files_extended[$i_article]', '$output_concord_files_prepare[$i_article]', '$sys_time_prepare[$i_article]')
") || die $dbh->errstr;
}
$id_article++;
####
my $id_event = 0;
for ($i_event = 0; $i_event < @event_prepare; $i_event++){
$dbh->do("
INSERT INTO `event`(`id_event`, `event`)
VALUES ('$id_event', '$event_prepare[$i_event]')
") || die $dbh->errstr;
}
$id_event++;
####
$create_query = qq{
create table article_event_index(
id_article int(10) NOT NULL,
id_event int(10) NOT NULL,
primary key (id_article, id_event),
foreign key (id_article) references article (id_article),
foreign key (id_event) references event (id_event)
)
};
$dbh->do($create_query);
####
#!/usr/bin/perl -w
use strict;
use locale;
use warnings;
#use diagnostics;
use utf8;
binmode(STDIN, "encoding(utf8)");
binmode(STDOUT, "encoding(utf8)");
binmode(STDERR, "encoding(utf8)");
#Directory with Unitex output files
my @output_concord_files = glob("output_concord/*.txt");
#Using 'glob' implies random order of files => sort
@output_concord_files = map{$_->[1]} sort{$a->[0] <=> $b->[0]} map{/output_concord\/concord\.(.*)\.txt/; [$1, $_]} @output_concord_files;
my $index_file = "index.txt";
open (INDEX, '>:utf8', $index_file) || die "Couldn't open $index_file : $!\n";
my $event;
foreach my $output_concord_file(@output_concord_files){
open (my $fh, '<:utf8', $output_concord_file) || die "Couldn't open $output_concord_file : $!\n";
while (<$fh>){
if ($_ =~ /=E-(.*)=event/){
$event = $1;
print "$output_concord_file -> $event\n";
print INDEX "$output_concord_file -> $event\n";
}
}
}