in reply to Re^3: create an xml file from column file
in thread create an xml file from column file

if i my input was this

how o B-NP are o I-NP you o I-NP some o o really o B-GP
and i want the output as <key="type">NP</key><text>how are you</text><key="type">GP</key><text>really</text> That is i want the whole text from B-NP to I-NP to occur in between text open and close tags.

Replies are listed 'Best First'.
Re^5: create an xml file from column file
by hdb (Monsignor) on Jul 25, 2013 at 07:58 UTC

    Another change in requirements? Wow. Full rewrite needed for this one.

    use strict; use warnings; use Text::CSV; my $csv = Text::CSV->new( { sep_char => "\t" } ); # assuming tab separ +ated input open my $words, "<", "words.txt" or die "Cannot open words.txt: $!\n"; # the following shows what's in words.txt; it is NOT words.txt itself! =head words.txt how o B-NP are o I-NP you o I-NP some o o really o B-GP =cut my $lasttype; my @text; while( my $row = $csv->getline( $words ) ) { my $text = $$row[0]; my $type = ( $$row[-1] =~ /\w-(\w+)/ ) ? $1 : ""; $lasttype = $type unless @text; # special treatment for first +row if( $type eq $lasttype ) { push @text, $text; } else { print '<key="type">'."$lasttype</key><text>@text</text +>\n" if $lasttype; $lasttype = $type; @text = ( $text ); } } # print what's left over when all input read print '<key="type">'."$lasttype</key><text>@text</text>\n" if $lasttyp +e; close $words;

      is it possible to do without CSV .The code i wrote is below.But the required output is not coming

      $out=" "; my @text; while(<>) { chomp; @arr=split(/\s+/,$_); push @r,$arr[0]; push @e,join' ',($arr[0],$arr[2]); } print '<text>'; print join ' ',@r; print '</text>'; print"\n"; for my $a (@e) { @q=split(/\s/,$a); if($q[1]=~/^B-/) { $p=$'; $m=$q[0] if( $q[1] =~/^B-/ or $q[1]="o"); $out.="<annotation>\n<key=type>$p</type>\n<text>$m</text>\n</annotatio +n>\n"; } } print"$out\n";

        At your own risk, splitting on tab:

        use strict; use warnings; open my $words, "<", "words.txt" or die "Cannot open words.txt: $!\n"; my $lasttype; my @text; while( <$words> ) { chomp; my @row = split /\t/; my $text = $row[0]; my $type = ( $row[-1] =~ /\w-(\w+)/ ) ? $1 : ""; $lasttype = $type unless @text; # special treatment for first +row if( $type eq $lasttype ) { push @text, $text; } else { print '<key="type">'."$lasttype</key><text>@text</text +>\n" if $lasttype; $lasttype = $type; @text = ( $text ); } } # print what's left over when all input read print '<key="type">'."$lasttype</key><text>@text</text>\n" if $lasttyp +e; close $words;