Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl Monk, Perl Meditation
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??
    0: #!/usr/bin/perl -w
    1: ##############################################################################
    2: #
    3: # XML2PDF
    4: #
    5: # Author : HolyGrail
    6: # Version: 0.7
    7: #
    8: # Usage: xml2pdf {<xmlfile>}
    9: #
    10: # This conversionprogram translates XML into PDF. You can specify more than one
    11: # xmlfile at the commandline and they will all be processed. The PDF files will
    12: # replace ".xml" with ".pdf" or add ".pdf" if the sourcefile does not end with 
    13: # ".xml". It does no XML validation at all. The XML file should conform to the 
    14: # following DTD:
    15: #
    16: # <!ELEMENT document (section1)+>
    17: # <!ATTLIST document
    18: #     title CDATA #REQUIRED
    19: # >
    20: # 
    21: # <!ELEMENT section1 (section2 | p | img)+>
    22: # <!ATTLIST section1
    23: #     title CDATA #REQUIRED
    24: # >
    25: #
    26: # <!ELEMENT section2 (p | img)+>
    27: # <!ATTLIST section2
    28: #     title CDATA #REQUIRED
    29: # >
    30: #
    31: # <!ELEMENT p (img | #PCDATA)>
    32: #
    33: # <!ELEMENT img>
    34: # <!ATTLIST img
    35: #     src CDATA #REQUIRED
    36: # >
    37: # 
    38: # Known issues:
    39: # * 'img'-elements are processed after a complete 'p' element is processed, 
    40: #   so the images that are meant to come 'in' the text, come 'after' the 
    41: #   text. The workaround for this should be to split the text up in multiple 
    42: #   p-elements and put the img-element between two p-elements.
    43: # * Images that are wider than the page fall off the page on the right side
    44: #
    45: # Bugs:
    46: # * Lots of them, no doubt
    47: #
    48: # Changes:
    49: #
    50: # 0.7: 'img' tags can now also be url's and no longer just local files
    51: # 0.6: It now also accepts images (JPG, GIF and PNG), changed the DTD 
    52: #      accordingly.
    53: # 0.5: Changed the program call, so you can process multiple files in one call
    54: # 0.4: It now can handle sections that don't fit on one page. It starts new 
    55: #      pages if needed.
    56: # 0.3: Adapted it to conform to OeufMayo's DTD
    57: # 0.2: Minor change (0.1 re-declared the global vars as 'my', thanks to davorg
    58: # 0.1: First release
    59: #
    60: ##############################################################################
    61: use strict;
    62: use PDFLib;
    63: use XML::XPath;
    64: use LWP::Simple;
    65: use vars qw ( $TOPMARGIN 
    66:               $BOTTOMMARGIN 
    67:               $LEFTMARGIN 
    68:               $RIGHTMARGIN 
    69:               $PAGEHEIGHTPOINTS 
    70:               $PAGEWIDTHPOINTS
    71: 	      $MAXYPOS
    72: 	      %FONTDEFINITION
    73: 	      %IMAGES
    74:              );
    75: # Define the fonts for each element
    76: %FONTDEFINITION = ( section1  => [face => "Helvetica-Bold", size => "16.0"],
    77:                     section2  => [face => "Helvetica-Bold", size => "14.0"],
    78:                     p         => [face => "Helvetica", size => "12.0"]
    79:                     );
    80: 
    81: # Pagesetup
    82: my $pagename      = "a4";
    83: $PAGEHEIGHTPOINTS = 842; # See PDFLib's documentation for 
    84: $PAGEWIDTHPOINTS  = 595; # the size of your page
    85: ($TOPMARGIN, $BOTTOMMARGIN, $LEFTMARGIN, $RIGHTMARGIN) = (10, 10, 15, 15);
    86: 
    87: foreach (@ARGV)
    88: {
    89:   # Documentsetup
    90:   my $xmlfile = $_;
    91:   my $xml = XML::XPath->new(filename => $xmlfile);
    92:   s/\.xml$/\.pdf/i if /\.xml$/i;
    93:   my $pdffile =  /\.pdf$/i ? $_ : "$_.pdf";
    94:   my $pdf = PDFLib->new( filename => $pdffile,
    95:                          papersize=> $pagename,
    96:                          creator  => "XML2PDF",
    97:                          title    => (($xml->findnodes('document'))[0]->getAttribute('name'))
    98:                         );
    99:   print "Converting $xmlfile to $pdffile \n";
    100: 
    101:   # pre-process the images:
    102:   foreach my $img ($xml->findnodes('//img'))
    103:   {
    104:     my $filetype = 'jpeg' if $img->getAttribute('src')=~ /[jpg|jpeg]$/i;
    105:     $filetype = 'gif' if $img->getAttribute('src')=~ /gif$/i;
    106:     $filetype = 'png' if $img->getAttribute('src')=~ /png$/i;
    107: 
    108:     my $filename = $img->getAttribute('src');
    109: 
    110:     if ($filename =~ /^http:\/\//i)
    111:     {
    112:       $filename =~ /.*\/(.*\..*)/;
    113:       print $1."\n";
    114:       getstore($img->getAttribute('src'), $1);
    115:       $filename=$1;
    116:     }  
    117: 
    118:     $IMAGES{$img->getAttribute('src')} =  
    119:        $pdf->load_image(filetype => $filetype || 'gif',
    120:                         filename => $filename );
    121:   }
    122: 
    123:   # Process the document
    124:   foreach my $section ($xml->findnodes('document/section1'))
    125:   {
    126:     #start every section on a new page
    127:     $pdf->start_page();
    128:     
    129:     # Calculate the starting Y-axis value
    130:     $MAXYPOS = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
    131:     
    132:     # Print the header
    133:     printpdf($pdf, $FONTDEFINITION{section1}, 
    134:              "\n".$section->getAttribute('title')."\n\n", $MAXYPOS);
    135:     
    136:     processsection($pdf, $section);
    137:   }
    138: }
    139: 
    140: sub printpdf
    141: {
    142:     my ($pdf, $font, $what, $ypos) = @_;
    143:     $pdf->set_font( @{$font} );
    144:     my $charsnotprinted = length($what);
    145:     
    146:     while ($charsnotprinted)
    147:     {
    148:       $charsnotprinted = $pdf->print_boxed($what,
    149:                           mode => "left",
    150:                           x    => $LEFTMARGIN,
    151:                           y    => $BOTTOMMARGIN,
    152:                           w    => $PAGEWIDTHPOINTS - $RIGHTMARGIN - 
    153:                                    $LEFTMARGIN,
    154:                           h    => $ypos || $pdf->get_value("texty")
    155:                        );
    156:        $what = substr($what, (length($what) - $charsnotprinted), length($what));
    157:        $pdf->start_page() if $charsnotprinted;
    158:        $ypos = $PAGEHEIGHTPOINTS - $TOPMARGIN - $BOTTOMMARGIN;
    159:      }           
    160: }
    161: 
    162: sub processsection
    163: {
    164:   my ($pdf, $section) = @_;
    165:   foreach my $node ($section->findnodes('*'))
    166:   {
    167:     my $text;
    168:     if ($node->getName() eq "section2")
    169:     {
    170:       printpdf($pdf, $FONTDEFINITION{$node->getName()}, 
    171:                "\n".$node->getAttribute('title')."\n");
    172:       processsection($pdf, $node); #recurse!
    173:     }
    174:     if ($node->getName() eq "p")
    175:     {
    176:       printpdf($pdf, $FONTDEFINITION{$node->getName()}, 
    177:                "\n".$node->string_value()."\n");
    178:       processsection($pdf, $node); #recurse!
    179:     }
    180:     if ($node->getName() eq "img")
    181:     {
    182:       if ($pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height() < 0)
    183:       {
    184:         $pdf->start_page();
    185:         $pdf->set_text_pos($pdf->get_value('textx'), $PAGEHEIGHTPOINTS - $TOPMARGIN);
    186:       }
    187:       $pdf->add_image(img => $IMAGES{$node->getAttribute('src')},
    188:                       x   => $LEFTMARGIN,
    189: 		      y   => $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
    190:       $pdf->set_text_pos($pdf->get_value('textx'), 
    191:                          $pdf->get_value('texty')-$IMAGES{$node->getAttribute('src')}->height());
    192:     }
    193:     
    194:   }
    195: 
    196: }
    

In reply to XML2PDF by holygrail

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others romping around the Monastery: (5)
As of 2024-04-23 06:35 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found