0: #!/usr/bin/perl -w
1: ##############################################################################
2: #
3: # XML2PDF
4: #
5: # Author : HolyGrail
6: # Version: 0.7
7: #
8: # Usage: xml2pdf {<xmlfile>}
9: #
10: # This conversionprogram translates XML into PDF. You can specify more than one
11: # xmlfile at the commandline and they will all be processed. The PDF files will
12: # replace ".xml" with ".pdf" or add ".pdf" if the sourcefile does not end with
13: # ".xml". It does no XML validation at all. The XML file should conform to the
14: # following DTD:
15: #
16: # <!ELEMENT document (section1)+>
17: # <!ATTLIST document
18: # title CDATA #REQUIRED
19: # >
20: #
21: # <!ELEMENT section1 (section2 | p | img)+>
22: # <!ATTLIST section1
23: # title CDATA #REQUIRED
24: # >
25: #
26: # <!ELEMENT section2 (p | img)+>
27: # <!ATTLIST section2
28: # title CDATA #REQUIRED
29: # >
30: #
31: # <!ELEMENT p (img | #PCDATA)>
32: #
33: # <!ELEMENT img>
34: # <!ATTLIST img
35: # src CDATA #REQUIRED
36: # >
37: #
38: # Known issues:
39: # * 'img'-elements are processed after a complete 'p' element is processed,
40: # so the images that are meant to come 'in' the text, come 'after' the
41: # text. The workaround for this should be to split the text up in multiple
42: # p-elements and put the img-element between two p-elements.
43: # * Images that are wider than the page fall off the page on the right side
44: #
45: # Bugs:
46: # * Lots of them, no doubt
47: #
48: # Changes:
49: #
50: # 0.7: 'img' tags can now also be url's and no longer just local files
51: # 0.6: It now also accepts images (JPG, GIF and PNG), changed the DTD
52: # accordingly.
53: # 0.5: Changed the program call, so you can process multiple files in one call
54: # 0.4: It now can handle sections that don't fit on one page. It starts new
55: # pages if needed.
56: # 0.3: Adapted it to conform to OeufMayo's DTD
57: # 0.2: Minor change (0.1 re-declared the global vars as 'my', thanks to davorg
58: # 0.1: First release
59: #
60: ##############################################################################
61: use strict;
62: use PDFLib;
63: use XML::XPath;
64: use LWP::Simple;
65: use vars qw ( $TOPMARGIN
66: $BOTTOMMARGIN
67: $LEFTMARGIN
68: $RIGHTMARGIN
69: $PAGEHEIGHTPOINTS
70: $PAGEWIDTHPOINTS
71: $MAXYPOS
72: %FONTDEFINITION
73: %IMAGES
74: );
75: # Define the fonts for each element
76: %FONTDEFINITION = ( section1 => [face => "Helvetica-Bold", size => "16.0"],
77: section2 => [face => "Helvetica-Bold", size => "14.0"],
78: p => [face => "Helvetica", size => "12.0"]
79: );
80:
81: # Pagesetup
82: my $pagename = "a4";
83: $PAGEHEIGHTPOINTS = 842; # See PDFLib's documentation for
84: $PAGEWIDTHPOINTS = 595; # the size of your page
85: ($TOPMARGIN, $BOTTOMMARGIN, $LEFTMARGIN, $RIGHTMARGIN) = (10, 10, 15, 15);
86:
87: foreach (@ARGV)
88: {
89: # Documentsetup
90: my $xmlfile = $_;
91: my $xml = XML::XPath->new(filename => $xmlfile);
92: s/\.xml$/\.pdf/i if /\.xml$/i;
93: my $pdffile = /\.pdf$/i ? $_ : "$_.pdf";
94: my $pdf = PDFLib->new( filename => $pdffile,
95: papersize=> $pagename,
96: creator => "XML2PDF",
97: title => (($xml->findnodes('document'))[0]->getAttribute('name'))
98: );
99: print "Converting $xmlfile to $pdffile \n";
100:
101: # pre-process the images:
102: foreach my $img ($xml->findnodes('//img'))
103: {
104: my $filetype = 'jpeg' if $img->getAttribute('src')=~ /[jpg|jpeg]$/i;
105: $filetype = 'gif' if $img->getAttribute('src')=~ /gif$/i;
106: $filetype = 'png' if $img->getAttribute('src')=~ /png$/i;
107:
108: my $filename = $img->getAttribute('src');
109:
110: if ($filename =~ /^http:\/\//i)
111: {
112: $filename =~ /.*\/(.*\..*)/;
113: print $1."\n";
114: getstore($img->getAttribute('src'), $1);
115: $filename=$1;
116: }
117:
118: $IMAGES{$img->getAttribute('src')} =
119: $pdf->load_image(filetype => $filetype || 'gif',
120: filename => $filename );
121: }
122:
123: # Process the document
124: foreach my $section ($xml->findnodes('document/section1'))
125: {
126: #start every section on a new page
127: $pdf->start_page();
128:
129: # Calculate the starting Y-axis value
130: $MAXYPOS = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
131:
132: # Print the header
133: printpdf($pdf, $FONTDEFINITION{section1},
134: "\n".$section->getAttribute('title')."\n\n", $MAXYPOS);
135:
136: processsection($pdf, $section);
137: }
138: }
139:
140: sub printpdf
141: {
142: my ($pdf, $font, $what, $ypos) = @_;
143: $pdf->set_font( @{$font} );
144: my $charsnotprinted = length($what);
145:
146: while ($charsnotprinted)
147: {
148: $charsnotprinted = $pdf->print_boxed($what,
149: mode => "left",
150: x => $LEFTMARGIN,
151: y => $BOTTOMMARGIN,
152: w => $PAGEWIDTHPOINTS - $RIGHTMARGIN -
153: $LEFTMARGIN,
154: h => $ypos || $pdf->get_value("texty")
155: );
156: $what = substr($what, (length($what) - $charsnotprinted), length($what));
157: $pdf->start_page() if $charsnotprinted;
158: $ypos = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
159: }
160: }
161:
162: sub processsection
163: {
164: my ($pdf, $section) = @_;
165: foreach my $node ($section->findnodes('*'))
166: {
167: my $text;
168: if ($node->getName() eq "section2")
169: {
170: printpdf($pdf, $FONTDEFINITION{$node->getName()},
171: "\n".$node->getAttribute('title')."\n");
172: processsection($pdf, $node); #recurse!
173: }
174: if ($node->getName() eq "p")
175: {
176: printpdf($pdf, $FONTDEFINITION{$node->getName()},
177: "\n".$node->string_value()."\n");
178: processsection($pdf, $node); #recurse!
179: }
180: if ($node->getName() eq "img")
181: {
182: if ($pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height() < 0)
183: {
184: $pdf->start_page();
185: $pdf->set_text_pos($pdf->get_value('textx'), $PAGEHEIGHTPOINTS - $TOPMARGIN);
186: }
187: $pdf->add_image(img => $IMAGES{$node->getAttribute('src')},
188: x => $LEFTMARGIN,
189: y => $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
190: $pdf->set_text_pos($pdf->get_value('textx'),
191: $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
192: }
193:
194: }
195:
196: }
In reply to XML2PDF
by holygrail
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.