Re^2: Relational table with perl DBI

You are right, all the arrays are inserted in one go. It looks like this :

sub fill_Tables{
    #Connecting to the database

#############################################################
    $dsn = "DBI:mysql:database=$database;host=$hostname";
    $dbh = DBI->connect($dsn, $login, $password) || die "Couldn't conn
+ect to $database : $!\n";

    #Filling tables
    #############################################################
    #Filling table 'article'
    #############################################################
    #Gathering data from daily output for insertion
    #############################################################

    #Directory with Unitex output files
    my @output_concord_files = glob("output_concord/*.txt");

    #Using 'glob' implies random order of files => sort 
    @output_concord_files = map{$_->[1]} sort{$a->[0] <=> $b->[0]} map
+{/output_concord\/concord\.(.*)\.txt/; [$1, $_]} @output_concord_file
+s;

    #Declaring variable for data extraction 
    my ($output_concord_file, $url, $sys_time, $event);
    my (@output_concord_files_prepare, @url_prepare, @sys_time_prepare
+, @event_prepare, @index);
    #Opening, reading, and extracting column content of each concord.n
+.txt file
    foreach $output_concord_file(@output_concord_files){
        #Note : for the concord file, no processing implied -> stored 
+by default in $output_concord_file
        open (my $fh, '<:utf8', "$output_concord_file") || die "Couldn
+'t open $output_concord_file : $!\n";
        #Populating @output_concord_files_prepare array for column 'co
+ncord_file' insertion
        push @output_concord_files_prepare, $output_concord_file;
        while (<$fh>){
            if ($_ =~ /=\[=(.*)=\]=/){
                $url = $1;
                #Populating @url_prepare array for column 'url'  from 
+'article' table insertion
                push @url_prepare, $url;
            }
            if ($_ =~ /=\[\+(.*)\+\]=/){
                $sys_time = $1;
                #Populating @sys_time_prepare array for column 'sys_ti
+me' from 'article' table insertion
                push @sys_time_prepare, $sys_time;
            }
            if ($_ =~ /=E-(.*)=event/){
                $event = $1;
                #Populating @event_duplicates array for column 'event'
+ from 'event' table insertion
                push @event_prepare, $event;
                #print "$output_concord_file -> $event\n";
                push @index, $output_concord_file, $event;
            }
        }
    }
    #Input files for extraction of column : html_extr_text
    my $dir_html_pages = 'html_pages';

    #Opening directory for readdir method, extracting only '.txt' file
+s -> cohabitation with '.html' files
    opendir (DIRHTMLPAGES, $dir_html_pages) || die "Coudln't open $dir
+_html_pages : $!";
    #Extracting only the '.txt' format files
    my @html_pages_files = map{s/\.[^.]+$//;$_}grep{/\.txt$/} readdir 
+DIRHTMLPAGES;
    closedir (DIRHTMLPAGES);

    #Mapping and sorting the output following the readdir method patte
+rn the "html.n.txt" file number, where n = 1 .. n
    @html_pages_files = map{$_->[1]} sort{$a->[0] <=> $b->[0]} map{/ht
+ml\.(.*)/; [$1, $_]} @html_pages_files;

    #Adding path 'html_pages/' and extension '.txt' of each file
    my @html_pages_files_extended;
    my $path = "html_pages\/";
    my $extension = "\.txt ";
    my ($html_page_file, $extended);
    foreach $html_page_file(@html_pages_files){
        $extended = $path . $html_page_file . $extension;
        #Populating @html_pages_files_extended for column html_extr_te
+xt insertion
        push @html_pages_files_extended, $extended;
    }

    #print "@output_concord_files_prepare\n";
    #print "@url_prepare\n";
    #print "@sys_time_prepare\n";
    #print "@event_prepare\n";
    #print "@html_pages_files_extended\n";

    #Insertion of extracted and synchronized data in 'article' table o
+f e_slide database
    my $i_article;
    my $id_article = 0;
    my @article_index;
    for ($i_article = 0; $i_article < @output_concord_files_prepare; $
+i_article++){
        #$dbh->do("
        #    INSERT INTO `article`(`id_article`, `url`, `html_extr_tex
+t`,`concord_file`, `sys_time`)
        #    VALUES ('$id_article', '$url_prepare[$i_article]', '$html
+_pages_files_extended[$i_article]', '$output_concord_files_prepare[$i
+_article]', '$sys_time_prepare[$i_article]')
        #") || die $dbh->errstr;
        push @article_index, $i_article;
    }
    #print "@article_event_index\n";
    #print "@article_index\n";
    $id_article++;    

    #Insertion of extracted and synchonized data in 'event' table of e
+_slide database
    my $i_event;
    my $id_event = 0;
    my @event_index;
    for ($i_event = 0; $i_event < @event_prepare; $i_event++){
        #$dbh->do("
        #    INSERT INTO `event`(`id_event`, `event`)
        #    VALUES ('$id_event', '$event_prepare[$i_event]')
        #") || die $dbh->errstr;
        push @event_index, $i_event;
    }
    #print "@event_index\n";
    $id_event++;

    my $index_concord;
    my @index_concord;
    foreach my $index(@index){
        if ($index =~ /output_concord\/concord\.(.*)\.txt(.*)/){
            $index_concord = $1;
            push @index_concord, $index_concord;
        }
    }
    #print "@index_concord\n";

    #Insertion of extracted and sychronized indexes in 'article_event_
+index'
    my $i_article_event_index;
    for ($i_article_event_index; $i_article_event_index < @event_index
+; $i_article_event_index++){
        $dbh->do("
            INSERT INTO `article_event_index`(`id_article`, `id_event`
+)
            VALUES ('$index_concord[$i_article_event_index]', '$event_
+index[$i_article_event_index]')
        ") || die $dbh->errstr;
    }
}
[download]

As you can see it's pretty complicated to get the neccessary data for each array that I'm going to use to populate each column of each table.

The last table that I fill is the index one, and I get and error like this : "DBD::mysql::db do failed: Cannot add or update a child row: a foreign key constraint fails (`e_slide`.`article_event_index`, CONSTRAINT `article_event_index_ibfk_1` FOREIGN KEY (`id_article`) REFERENCES `article` (`id_article`)) at db2.pl line 262. Cannot add or update a child row: a foreign key constraint fails (`e_slide`.`article_event_index`, CONSTRAINT `article_event_index_ibfk_1` FOREIGN KEY (`id_article`) REFERENCES `article` (`id_article`)) at db2.pl line 262."

By using "LAST_ID" and the autoincrement option of MySQL will I get all the id of the two tables or only the last one of each ?

Thanks again!

Comment on Re^2: Relational table with perl DBI Download Code

Replies are listed 'Best First'.
Re^3: Relational table with perl DBI by M15U (Acolyte) on Mar 13, 2013 at 10:13 UTC
I tried using 'last_insert_id' and I get the id from my tables. The code looks like this : #Insertion of extracted and synchronized data in 'article' table of e_ +slide database my $i_article; my $id_article; for ($i_article = 0; $i_article < @output_concord_files_prepare; $ +i_article++){ $dbh->do(" INSERT INTO `article`(`url`, `html_extr_text`,`concord_fil +e`, `sys_time`) VALUES ('$url_prepare[$i_article]', '$html_pages_files_ext +ended[$i_article]', '$output_concord_files_prepare[$i_article]', '$sy +s_time_prepare[$i_article]') ") \|\| die $dbh->errstr; $id_article = $dbh->last_insert_id(undef, undef, 'article', 'i +d_article'); } #Insertion of extracted and synchonized data in 'event' table of e +_slide database my $i_event; my $id_event; for ($i_event = 0; $i_event < @event_prepare; $i_event++){ $dbh->do(" INSERT INTO `event`(`event`) VALUES ('$event_prepare[$i_event]') ") \|\| die $dbh->errstr; $id_event = $dbh->last_insert_id(undef, undef, 'event', 'id_ev +ent'); } [download] So now, how do I get the one-to-many relationship in the third table? Because in this case one article contains multiple events. So the it would look like : 1 - 1, 1 - 2, 2 - 3, 2 - 4, 2 - 5 and so on. I also manage to have a "good practice" code for the insertion using the code that you provided : my @fields = (qw(url html_extr_text concord_file sys_time)); my $fieldlist = join ", ", @fields; my $field_placeholders = join ", ", map {'?'} @fields; my $insert_query = qq{ INSERT INTO article($fieldlist) VALUES ($field_placeholders) }; my $sth = $dbh->prepare($insert_query); foreach my $article_index (0 .. @output_concord_files_prepare){ $field_placeholders = $sth->execute($url_prepare[$article_inde +x], $html_pages_files_extended[$article_index], $output_concord_files +_prepare[$article_index], $sys_time_prepare[$article_index]); if ($field_placeholders != 1){ die "Error inserting records, only [$field_placeholders] g +ot inserted: " . $sth->insert->errstr; } } [download] But still I don't know how to make the one-to-manu relationship in perl.	[reply] [d/l] [select]
Re^4: Relational table with perl DBI by Neighbour (Friar) on Mar 13, 2013 at 12:30 UTC
First off, very nice how you improved on the insert query, for example when building the `$field_placeholders`. In order to get the relations properly, you'll learn about another nifty feature of perl: hashes. A hash is an unsorted list of key-value pairs. We're going to loop through the files, accumulating the data per file in `$hr_output` and accumulating all per-file data in `$ar_data`. The prefixes hr_ and ar_ are not required by perl, but they help me to distinguish between references to hashes (hashref, or hr) and references to arrays (arrayref, or ar). my $ar_data = []; #Opening, reading, and extracting column content of each concord.n.txt + file foreach my $output_concord_file (@output_concord_files){ #Note : for the concord file, no processing implied -> stored by d +efault in $output_concord_file open (my $fh, '<:utf8', "$output_concord_file") \|\| die "Couldn't o +pen $output_concord_file : $!"; my $hr_output = {}; $hr_output->{concord_file} = $output_concord_file; while (<$fh>){ if ($_ =~ /=\[=(.)=\]=/){ $hr_output->{url} = $1; } if ($_ =~ /=\[\+(.)\+\]=/){ $hr_output->{sys_time} = $1; } if ($_ =~ /=E-(.*)=event/){ push(@{$hr_output->{events}}, $1); } } push(@{$ar_data}, $hr_output); close ($fh); } [download] I'm not quite sure what you intended to do to those html-pages, and i'm not sure that what's happening is what you really want, so I'm just going to skip that :P Next up is inserting all accumulated data: if (@{$ar_data}) { # If there is any data, prepare the query my @fields = keys %{$ar_data->[0]}; # Extract all the keys from th +e first element of ar_data my $fieldlist = join(',', @fields); my $field_placeholders = join(',', map {'?'} @fields); my $sth_insert_article = $dbh->prepare( qq( INSERT INTO article ($fieldlist) VALUES ($field_placeholders) )); my $sth_select_last_insert_id = $dbh->prepare( qq( SELECT LAST_INS +ERT_ID )); my $sth_insert_event = $dbh->prepare( qq( INSERT INTO event (event) VALUES (?) )); my $sth_insert_article_event = $dbh->prepare( qq( INSERT INTO article_event_index (id_article, id_event) VALUES +(?,?) )); # Now we loop over and insert all data foreach my $hr_output (@{$ar_data}) { # Insert article data my $inserted_records = $sth_insert_article->execute(@{$hr_outp +ut}{@fields}); # Ok, I admit, this one is really tricky to read :) if ($inserted_records != 1) { die "Error inserting article [$h +r_output->{url}], only [$inserted_records] got inserted: " . $sth_ins +ert_article->errstr; } # Get article ID my $article_id = ($dbh->selectcol_arrayref($sth_select_last_in +sert_id))->[0]; # Insert events foreach my $event (@{$hr_output->{events}}) { $inserted_records = $sth_insert_event->execute($event); if ($inserted_records != 1) { die "Error inserting event [ +$event], only [$inserted_records] got inserted: " . $sth_insert_event +->errstr; } # Get event ID my $event_id = ($dbh->selectcol_arrayref($sth_select_last_ +insert_id))->[0]; # Insert article_event combo $inserted_records = $sth_insert_article_event->execute($ar +ticle_id, $event_id); if ($inserted_records != 1) { die "Error inserting article +-event [$article_id, $event_id], only [$inserted_records] got inserte +d: " . $sth_insert_article_event->errstr; } } } } [download] This follows the loop I described in my previous post. There are a few tricks here, which I'll leave for you to figure out (you can still ask though :)). The most unreadable trick is `@{$hr_output}{@fields}` which produces an array of the value-parts of the hashref for all @fields, in order of those @fields.	[reply] [d/l] [select]
Re^5: Relational table with perl DBI by M15U (Acolyte) on Mar 13, 2013 at 13:55 UTC
Thank you very much for your answer. I though that at some point I'll have to use hashes, but I was always afraid of them. I understand your code and I tried to incorporate it in my own. I've already seen on the web the @{$hr_output}{@fields} syntax, though in this context it's the only time when I get it. However I have a small program in the code. I get : "DBD::mysql::st execute failed: Unknown column 'event' in 'field list' at db2.pl line 289. DBD::mysql::db selectcol_arrayref failed: Unknown column 'LAST_INSERT_ID' in 'field list' at db2.pl line 292. Can't use an undefined value as an ARRAY reference at db2.pl line 292. " I don't really see where do the script bug. And also the first "if" error managment loop gives me : "Use of uninitialized value $inserted_records in numeric ne (!=) at db2.pl line 290. Use of uninitialized value $inserted_records in concatenation (.) or string at db2.pl line 290. Error inserting article http://rss.feedsportal.com/c/32788/f/524037/s/29456ceb/l/0Llci0Btf10Bfr0Cfrance0Cfaits0Edivers0Chautes0Epyrenees0Eemportes0Epar0Eune0Eavalanche0Eils0Es0Een0Esortent0Epresque0E78664640Bhtml/story01.htm, only [] got inserted: Unknown column 'event' in 'field list' at db2.pl line 290." Though thank you very much again.	[reply]
Re^6: Relational table with perl DBI by M15U (Acolyte) on Mar 13, 2013 at 14:06 UTC
Re^7: Relational table with perl DBI by M15U (Acolyte) on Mar 13, 2013 at 14:46 UTC
Some notes below your chosen depth have not been shown here
Re^3: Relational table with perl DBI by poj (Abbot) on Mar 14, 2013 at 10:43 UTC
It looks to me like this code has the potential to give you the same event more than once in the event table each with a different id. `if ($_ =~ /=E-(.)=event/){ $event = $1; push @event_prepare, $event; }` [download] It would be much simpler to forget using numerical keys and just use the $event itself as the primary key. Use a hash to eliminate duplicates like this ; `if ($_ =~ /=E-(.)=event/){ $event_prepare{$1} = 1; }` [download] With regard to the article table, I would use the n value from the filename outputcondord.n.txt as the primary key thus avoiding sorting and synchronising problems as well as making the data in the table more human readable. Your article_event_index would then just need to contain the n value from the filename and the text from the events in that file. poj	[reply] [d/l] [select]