1: #!/usr/bin/perl -w 2: ############################################################################## 3: # 4: # XML2PDF 5: # 6: # Author : HolyGrail 7: # Version: 0.7 8: # 9: # Usage: xml2pdf {<xmlfile>} 10: # 11: # This conversionprogram translates XML into PDF. You can specify more than one 12: # xmlfile at the commandline and they will all be processed. The PDF files will 13: # replace ".xml" with ".pdf" or add ".pdf" if the sourcefile does not end with 14: # ".xml". It does no XML validation at all. The XML file should conform to the 15: # following DTD: 16: # 17: # <!ELEMENT document (section1)+> 18: # <!ATTLIST document 19: # title CDATA #REQUIRED 20: # > 21: # 22: # <!ELEMENT section1 (section2 | p | img)+> 23: # <!ATTLIST section1 24: # title CDATA #REQUIRED 25: # > 26: # 27: # <!ELEMENT section2 (p | img)+> 28: # <!ATTLIST section2 29: # title CDATA #REQUIRED 30: # > 31: # 32: # <!ELEMENT p (img | #PCDATA)> 33: # 34: # <!ELEMENT img> 35: # <!ATTLIST img 36: # src CDATA #REQUIRED 37: # > 38: # 39: # Known issues: 40: # * 'img'-elements are processed after a complete 'p' element is processed, 41: # so the images that are meant to come 'in' the text, come 'after' the 42: # text. The workaround for this should be to split the text up in multiple 43: # p-elements and put the img-element between two p-elements. 44: # * Images that are wider than the page fall off the page on the right side 45: # 46: # Bugs: 47: # * Lots of them, no doubt 48: # 49: # Changes: 50: # 51: # 0.7: 'img' tags can now also be url's and no longer just local files 52: # 0.6: It now also accepts images (JPG, GIF and PNG), changed the DTD 53: # accordingly. 54: # 0.5: Changed the program call, so you can process multiple files in one call 55: # 0.4: It now can handle sections that don't fit on one page. It starts new 56: # pages if needed. 57: # 0.3: Adapted it to conform to OeufMayo's DTD 58: # 0.2: Minor change (0.1 re-declared the global vars as 'my', thanks to davorg 59: # 0.1: First release 60: # 61: ############################################################################## 62: use strict; 63: use PDFLib; 64: use XML::XPath; 65: use LWP::Simple; 66: use vars qw ( $TOPMARGIN 67: $BOTTOMMARGIN 68: $LEFTMARGIN 69: $RIGHTMARGIN 70: $PAGEHEIGHTPOINTS 71: $PAGEWIDTHPOINTS 72: $MAXYPOS 73: %FONTDEFINITION 74: %IMAGES 75: ); 76: # Define the fonts for each element 77: %FONTDEFINITION = ( section1 => [face => "Helvetica-Bold", size => "16.0"], 78: section2 => [face => "Helvetica-Bold", size => "14.0"], 79: p => [face => "Helvetica", size => "12.0"] 80: ); 81: 82: # Pagesetup 83: my $pagename = "a4"; 84: $PAGEHEIGHTPOINTS = 842; # See PDFLib's documentation for 85: $PAGEWIDTHPOINTS = 595; # the size of your page 86: ($TOPMARGIN, $BOTTOMMARGIN, $LEFTMARGIN, $RIGHTMARGIN) = (10, 10, 15, 15); 87: 88: foreach (@ARGV) 89: { 90: # Documentsetup 91: my $xmlfile = $_; 92: my $xml = XML::XPath->new(filename => $xmlfile); 93: s/\.xml$/\.pdf/i if /\.xml$/i; 94: my $pdffile = /\.pdf$/i ? $_ : "$_.pdf"; 95: my $pdf = PDFLib->new( filename => $pdffile, 96: papersize=> $pagename, 97: creator => "XML2PDF", 98: title => (($xml->findnodes('document'))[0]->getAttribute('name')) 99: ); 100: print "Converting $xmlfile to $pdffile \n"; 101: 102: # pre-process the images: 103: foreach my $img ($xml->findnodes('//img')) 104: { 105: my $filetype = 'jpeg' if $img->getAttribute('src')=~ /[jpg|jpeg]$/i; 106: $filetype = 'gif' if $img->getAttribute('src')=~ /gif$/i; 107: $filetype = 'png' if $img->getAttribute('src')=~ /png$/i; 108: 109: my $filename = $img->getAttribute('src'); 110: 111: if ($filename =~ /^http:\/\//i) 112: { 113: $filename =~ /.*\/(.*\..*)/; 114: print $1."\n"; 115: getstore($img->getAttribute('src'), $1); 116: $filename=$1; 117: } 118: 119: $IMAGES{$img->getAttribute('src')} = 120: $pdf->load_image(filetype => $filetype || 'gif', 121: filename => $filename ); 122: } 123: 124: # Process the document 125: foreach my $section ($xml->findnodes('document/section1')) 126: { 127: #start every section on a new page 128: $pdf->start_page(); 129: 130: # Calculate the starting Y-axis value 131: $MAXYPOS = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN; 132: 133: # Print the header 134: printpdf($pdf, $FONTDEFINITION{section1}, 135: "\n".$section->getAttribute('title')."\n\n", $MAXYPOS); 136: 137: processsection($pdf, $section); 138: } 139: } 140: 141: sub printpdf 142: { 143: my ($pdf, $font, $what, $ypos) = @_; 144: $pdf->set_font( @{$font} ); 145: my $charsnotprinted = length($what); 146: 147: while ($charsnotprinted) 148: { 149: $charsnotprinted = $pdf->print_boxed($what, 150: mode => "left", 151: x => $LEFTMARGIN, 152: y => $BOTTOMMARGIN, 153: w => $PAGEWIDTHPOINTS - $RIGHTMARGIN - 154: $LEFTMARGIN, 155: h => $ypos || $pdf->get_value("texty") 156: ); 157: $what = substr($what, (length($what) - $charsnotprinted), length($what)); 158: $pdf->start_page() if $charsnotprinted; 159: $ypos = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN; 160: } 161: } 162: 163: sub processsection 164: { 165: my ($pdf, $section) = @_; 166: foreach my $node ($section->findnodes('*')) 167: { 168: my $text; 169: if ($node->getName() eq "section2") 170: { 171: printpdf($pdf, $FONTDEFINITION{$node->getName()}, 172: "\n".$node->getAttribute('title')."\n"); 173: processsection($pdf, $node); #recurse! 174: } 175: if ($node->getName() eq "p") 176: { 177: printpdf($pdf, $FONTDEFINITION{$node->getName()}, 178: "\n".$node->string_value()."\n"); 179: processsection($pdf, $node); #recurse! 180: } 181: if ($node->getName() eq "img") 182: { 183: if ($pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height() < 0) 184: { 185: $pdf->start_page(); 186: $pdf->set_text_pos($pdf->get_value('textx'), $PAGEHEIGHTPOINTS - $TOPMARGIN); 187: } 188: $pdf->add_image(img => $IMAGES{$node->getAttribute('src')}, 189: x => $LEFTMARGIN, 190: y => $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height()); 191: $pdf->set_text_pos($pdf->get_value('textx'), 192: $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height()); 193: } 194: 195: } 196: 197: }
Back to
Craft