... my $xml = XML::Twig->new( pretty_print => 'nsgmls', # nsgmls for parsability output_encoding => 'UTF-8', twig_roots => { 'office:automatic-styles' => 1 }, twig_handlers => { 'style:style[@style:family="text"]/style:text-properties' => \&handler_style_collector, 'style:style' => \&handler_paragraph_style_collector, }, ); # $content is not saved from the first pass, it only builds some hashes $xml->parse($content); $xml->dispose; $xml = XML::Twig->new( pretty_print => 'nsgmls', # nsgmls for parsability output_encoding => 'UTF-8', twig_roots => { 'office:body' => 1 }, twig_handlers => { # link anchors (text:boomark) must be handled before # processing the internal links (text:a) '*[text:bookmark]' => \&handler_bookmark, 'text:note[@text:note-class="footnote"]/text:note-body' => \&handler_footnotes, 'text:note-citation' => \&handler_citation, # only some kinds 'text:span' => \&handler_span, # typographic markup 'text:list-item' => \&handler_list_item, # all lists become unordered 'table:table-header-rows' => \&handler_table_header_rows, 'table:table-row' => \&handler_table_row, 'table:table' => \&handler_table, # primitive table support 'text:line-break' => \&handler_line_break, 'text:table-of-content' => sub { $_->delete }, 'text:index-body' => sub { $_->delete }, 'text:alphabetical-index' => sub { $_->delete }, }, ); $xml->parse($content); $content = $xml->sprint; $xml->dispose; $xml = XML::Twig->new( pretty_print => 'nsgmls', empty_tags => 'html', output_encoding => 'UTF-8', twig_roots => { 'office:body' => 1 }, twig_handlers => { # links (text:a) must be handled after the link targets (text:bookmark) 'text:a' => \&handler_links, 'text:h' => \&handler_h, 'text:p' => \&handler_p, 'draw:frame' => \&handler_draw_frame, 'office:annotation' => sub { $_->delete }, 'office:annotation-end' => sub { $_->delete }, 'text:sequence-decls' => sub { $_->delete }, 'text:tracked-changes' => sub { $_->delete }, 'text:table-of-content' => sub { $_->delete }, 'office:forms' => sub { $_->delete }, 'text:list' => \&handler_lift_up, 'text:section' => \&handler_lift_up, 'office:body' => \&handler_lift_up, 'office:text' => \&handler_lift_up, }, ); $xml->parse($content); $content = $xml->sprint; $xml->dispose; . . .