#!/usr/bin/perl -w ############################################################################## # # XML2PDF # # Author : HolyGrail # Version: 0.7 # # Usage: xml2pdf {} # # This conversionprogram translates XML into PDF. You can specify more than one # xmlfile at the commandline and they will all be processed. The PDF files will # replace ".xml" with ".pdf" or add ".pdf" if the sourcefile does not end with # ".xml". It does no XML validation at all. The XML file should conform to the # following DTD: # # # # # # # # # # # # # # # # Known issues: # * 'img'-elements are processed after a complete 'p' element is processed, # so the images that are meant to come 'in' the text, come 'after' the # text. The workaround for this should be to split the text up in multiple # p-elements and put the img-element between two p-elements. # * Images that are wider than the page fall off the page on the right side # # Bugs: # * Lots of them, no doubt # # Changes: # # 0.7: 'img' tags can now also be url's and no longer just local files # 0.6: It now also accepts images (JPG, GIF and PNG), changed the DTD # accordingly. # 0.5: Changed the program call, so you can process multiple files in one call # 0.4: It now can handle sections that don't fit on one page. It starts new # pages if needed. # 0.3: Adapted it to conform to OeufMayo's DTD # 0.2: Minor change (0.1 re-declared the global vars as 'my', thanks to davorg # 0.1: First release # ############################################################################## use strict; use PDFLib; use XML::XPath; use LWP::Simple; use vars qw ( $TOPMARGIN $BOTTOMMARGIN $LEFTMARGIN $RIGHTMARGIN $PAGEHEIGHTPOINTS $PAGEWIDTHPOINTS $MAXYPOS %FONTDEFINITION %IMAGES ); # Define the fonts for each element %FONTDEFINITION = ( section1 => [face => "Helvetica-Bold", size => "16.0"], section2 => [face => "Helvetica-Bold", size => "14.0"], p => [face => "Helvetica", size => "12.0"] ); # Pagesetup my $pagename = "a4"; $PAGEHEIGHTPOINTS = 842; # See PDFLib's documentation for $PAGEWIDTHPOINTS = 595; # the size of your page ($TOPMARGIN, $BOTTOMMARGIN, $LEFTMARGIN, $RIGHTMARGIN) = (10, 10, 15, 15); foreach (@ARGV) { # Documentsetup my $xmlfile = $_; my $xml = XML::XPath->new(filename => $xmlfile); s/\.xml$/\.pdf/i if /\.xml$/i; my $pdffile = /\.pdf$/i ? $_ : "$_.pdf"; my $pdf = PDFLib->new( filename => $pdffile, papersize=> $pagename, creator => "XML2PDF", title => (($xml->findnodes('document'))[0]->getAttribute('name')) ); print "Converting $xmlfile to $pdffile \n"; # pre-process the images: foreach my $img ($xml->findnodes('//img')) { my $filetype = 'jpeg' if $img->getAttribute('src')=~ /[jpg|jpeg]$/i; $filetype = 'gif' if $img->getAttribute('src')=~ /gif$/i; $filetype = 'png' if $img->getAttribute('src')=~ /png$/i; my $filename = $img->getAttribute('src'); if ($filename =~ /^http:\/\//i) { $filename =~ /.*\/(.*\..*)/; print $1."\n"; getstore($img->getAttribute('src'), $1); $filename=$1; } $IMAGES{$img->getAttribute('src')} = $pdf->load_image(filetype => $filetype || 'gif', filename => $filename ); } # Process the document foreach my $section ($xml->findnodes('document/section1')) { #start every section on a new page $pdf->start_page(); # Calculate the starting Y-axis value $MAXYPOS = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN; # Print the header printpdf($pdf, $FONTDEFINITION{section1}, "\n".$section->getAttribute('title')."\n\n", $MAXYPOS); processsection($pdf, $section); } } sub printpdf { my ($pdf, $font, $what, $ypos) = @_; $pdf->set_font( @{$font} ); my $charsnotprinted = length($what); while ($charsnotprinted) { $charsnotprinted = $pdf->print_boxed($what, mode => "left", x => $LEFTMARGIN, y => $BOTTOMMARGIN, w => $PAGEWIDTHPOINTS - $RIGHTMARGIN - $LEFTMARGIN, h => $ypos || $pdf->get_value("texty") ); $what = substr($what, (length($what) - $charsnotprinted), length($what)); $pdf->start_page() if $charsnotprinted; $ypos = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN; } } sub processsection { my ($pdf, $section) = @_; foreach my $node ($section->findnodes('*')) { my $text; if ($node->getName() eq "section2") { printpdf($pdf, $FONTDEFINITION{$node->getName()}, "\n".$node->getAttribute('title')."\n"); processsection($pdf, $node); #recurse! } if ($node->getName() eq "p") { printpdf($pdf, $FONTDEFINITION{$node->getName()}, "\n".$node->string_value()."\n"); processsection($pdf, $node); #recurse! } if ($node->getName() eq "img") { if ($pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height() < 0) { $pdf->start_page(); $pdf->set_text_pos($pdf->get_value('textx'), $PAGEHEIGHTPOINTS - $TOPMARGIN); } $pdf->add_image(img => $IMAGES{$node->getAttribute('src')}, x => $LEFTMARGIN, y => $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height()); $pdf->set_text_pos($pdf->get_value('textx'), $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height()); } } }