comment on

0: #!/usr/bin/perl -w
1: ##############################################################################
2: #
3: # XML2PDF
4: #
5: # Author : HolyGrail
6: # Version: 0.7
7: #
8: # Usage: xml2pdf {<xmlfile>}
9: #
10: # This conversionprogram translates XML into PDF. You can specify more than one
11: # xmlfile at the commandline and they will all be processed. The PDF files will
12: # replace ".xml" with ".pdf" or add ".pdf" if the sourcefile does not end with 
13: # ".xml". It does no XML validation at all. The XML file should conform to the 
14: # following DTD:
15: #
16: # <!ELEMENT document (section1)+>
17: # <!ATTLIST document
18: #     title CDATA #REQUIRED
19: # >
20: # 
21: # <!ELEMENT section1 (section2 | p | img)+>
22: # <!ATTLIST section1
23: #     title CDATA #REQUIRED
24: # >
25: #
26: # <!ELEMENT section2 (p | img)+>
27: # <!ATTLIST section2
28: #     title CDATA #REQUIRED
29: # >
30: #
31: # <!ELEMENT p (img | #PCDATA)>
32: #
33: # <!ELEMENT img>
34: # <!ATTLIST img
35: #     src CDATA #REQUIRED
36: # >
37: # 
38: # Known issues:
39: # * 'img'-elements are processed after a complete 'p' element is processed, 
40: #   so the images that are meant to come 'in' the text, come 'after' the 
41: #   text. The workaround for this should be to split the text up in multiple 
42: #   p-elements and put the img-element between two p-elements.
43: # * Images that are wider than the page fall off the page on the right side
44: #
45: # Bugs:
46: # * Lots of them, no doubt
47: #
48: # Changes:
49: #
50: # 0.7: 'img' tags can now also be url's and no longer just local files
51: # 0.6: It now also accepts images (JPG, GIF and PNG), changed the DTD 
52: #      accordingly.
53: # 0.5: Changed the program call, so you can process multiple files in one call
54: # 0.4: It now can handle sections that don't fit on one page. It starts new 
55: #      pages if needed.
56: # 0.3: Adapted it to conform to OeufMayo's DTD
57: # 0.2: Minor change (0.1 re-declared the global vars as 'my', thanks to davorg
58: # 0.1: First release
59: #
60: ##############################################################################
61: use strict;
62: use PDFLib;
63: use XML::XPath;
64: use LWP::Simple;
65: use vars qw ( $TOPMARGIN 
66:               $BOTTOMMARGIN 
67:               $LEFTMARGIN 
68:               $RIGHTMARGIN 
69:               $PAGEHEIGHTPOINTS 
70:               $PAGEWIDTHPOINTS
71: 	      $MAXYPOS
72: 	      %FONTDEFINITION
73: 	      %IMAGES
74:              );
75: # Define the fonts for each element
76: %FONTDEFINITION = ( section1  => [face => "Helvetica-Bold", size => "16.0"],
77:                     section2  => [face => "Helvetica-Bold", size => "14.0"],
78:                     p         => [face => "Helvetica", size => "12.0"]
79:                     );
80: 
81: # Pagesetup
82: my $pagename      = "a4";
83: $PAGEHEIGHTPOINTS = 842; # See PDFLib's documentation for 
84: $PAGEWIDTHPOINTS  = 595; # the size of your page
85: ($TOPMARGIN, $BOTTOMMARGIN, $LEFTMARGIN, $RIGHTMARGIN) = (10, 10, 15, 15);
86: 
87: foreach (@ARGV)
88: {
89:   # Documentsetup
90:   my $xmlfile = $_;
91:   my $xml = XML::XPath->new(filename => $xmlfile);
92:   s/\.xml$/\.pdf/i if /\.xml$/i;
93:   my $pdffile =  /\.pdf$/i ? $_ : "$_.pdf";
94:   my $pdf = PDFLib->new( filename => $pdffile,
95:                          papersize=> $pagename,
96:                          creator  => "XML2PDF",
97:                          title    => (($xml->findnodes('document'))[0]->getAttribute('name'))
98:                         );
99:   print "Converting $xmlfile to $pdffile \n";
100: 
101:   # pre-process the images:
102:   foreach my $img ($xml->findnodes('//img'))
103:   {
104:     my $filetype = 'jpeg' if $img->getAttribute('src')=~ /[jpg|jpeg]$/i;
105:     $filetype = 'gif' if $img->getAttribute('src')=~ /gif$/i;
106:     $filetype = 'png' if $img->getAttribute('src')=~ /png$/i;
107: 
108:     my $filename = $img->getAttribute('src');
109: 
110:     if ($filename =~ /^http:\/\//i)
111:     {
112:       $filename =~ /.*\/(.*\..*)/;
113:       print $1."\n";
114:       getstore($img->getAttribute('src'), $1);
115:       $filename=$1;
116:     }  
117: 
118:     $IMAGES{$img->getAttribute('src')} =  
119:        $pdf->load_image(filetype => $filetype || 'gif',
120:                         filename => $filename );
121:   }
122: 
123:   # Process the document
124:   foreach my $section ($xml->findnodes('document/section1'))
125:   {
126:     #start every section on a new page
127:     $pdf->start_page();
128:     
129:     # Calculate the starting Y-axis value
130:     $MAXYPOS = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
131:     
132:     # Print the header
133:     printpdf($pdf, $FONTDEFINITION{section1}, 
134:              "\n".$section->getAttribute('title')."\n\n", $MAXYPOS);
135:     
136:     processsection($pdf, $section);
137:   }
138: }
139: 
140: sub printpdf
141: {
142:     my ($pdf, $font, $what, $ypos) = @_;
143:     $pdf->set_font( @{$font} );
144:     my $charsnotprinted = length($what);
145:     
146:     while ($charsnotprinted)
147:     {
148:       $charsnotprinted = $pdf->print_boxed($what,
149:                           mode => "left",
150:                           x    => $LEFTMARGIN,
151:                           y    => $BOTTOMMARGIN,
152:                           w    => $PAGEWIDTHPOINTS - $RIGHTMARGIN - 
153:                                    $LEFTMARGIN,
154:                           h    => $ypos || $pdf->get_value("texty")
155:                        );
156:        $what = substr($what, (length($what) - $charsnotprinted), length($what));
157:        $pdf->start_page() if $charsnotprinted;
158:        $ypos = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
159:      }           
160: }
161: 
162: sub processsection
163: {
164:   my ($pdf, $section) = @_;
165:   foreach my $node ($section->findnodes('*'))
166:   {
167:     my $text;
168:     if ($node->getName() eq "section2")
169:     {
170:       printpdf($pdf, $FONTDEFINITION{$node->getName()}, 
171:                "\n".$node->getAttribute('title')."\n");
172:       processsection($pdf, $node); #recurse!
173:     }
174:     if ($node->getName() eq "p")
175:     {
176:       printpdf($pdf, $FONTDEFINITION{$node->getName()}, 
177:                "\n".$node->string_value()."\n");
178:       processsection($pdf, $node); #recurse!
179:     }
180:     if ($node->getName() eq "img")
181:     {
182:       if ($pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height() < 0)
183:       {
184:         $pdf->start_page();
185:         $pdf->set_text_pos($pdf->get_value('textx'), $PAGEHEIGHTPOINTS - $TOPMARGIN);
186:       }
187:       $pdf->add_image(img => $IMAGES{$node->getAttribute('src')},
188:                       x   => $LEFTMARGIN,
189: 		      y   => $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
190:       $pdf->set_text_pos($pdf->get_value('textx'), 
191:                          $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
192:     }
193:     
194:   }
195: 
196: }

In reply to XML2PDF by holygrail

Are you posting in the right place? Check out Where do I post X? to know for sure.
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
Want more info? How to link or How to display code and escape characters are good places to start.


Perl Monk, Perl Meditation
	PerlMonks