use 5.010; use strict; use warnings; use Path::Tiny; my $data = path($ARGV[0])->slurp_raw; my $outcome = get_nr_of_cos_objects(); find_obj_blocks($outcome); # Returns the number of COS items found in an uncompressed PDF file # We're looking for a line that starts with "/Size", followed by a # number that indicates the number of objects found. # Note that we have to subtract 1 from the result found since the first # item in the XREF table is referring to a non-obj block (root) sub get_nr_of_cos_objects { my $regex = qr/(\/Size \d*)/; my $result = $1 if ($data =~ $regex) or -1; $result = $1 if $result =~ /(\d+)/; $result -= 1; # $1 is the result of the above regex execution. say "Number of COS: " . $result . "."; return $result; } # Searches for the occurences of patterns like "^x 0 obj" where x is # a number from 1 to nr of objects passed as a parameter. sub find_obj_blocks { my @counter = (1..$_[0]); my $result; for (@counter) { if ($data =~ qr/^\Q$_\E 0 obj/mp) { $result = $_; printf("Begin of Block [%5d] found at position [$-[0]]\n", $_); printf("End of Block [%5d] found at position [$+[0]]\n\n", $_); } } }