package Sol::Parser;
use strict;
use warnings;
use Log::Log4perl;
use Pod::Usage;
use Readonly;
Readonly my $LENGTH_OF_FLOAT => 8;
Readonly my $LENGTH_OF_INTEGER => 2;
Readonly my $LENGTH_OF_LONG => 4;
Readonly my $END_OF_OBJECT => "\x00\x00\x09";
my $conf = q(
log4perl.category.sol.parser = WARN, ScreenAppender
log4perl.appender.ScreenAppender = Log::Log4perl::Appender::
+Screen
log4perl.appender.ScreenAppender.stderr = 0
log4perl.appender.ScreenAppender.layout = PatternLayout
log4perl.appender.ScreenAppender.layout.ConversionPattern=[%p] %d %M
+:%L %m%n
);
Log::Log4perl::init( \$conf );
my $log = Log::Log4perl::->get_logger(q(sol.parser));
my $file = undef;
my $FH = undef;
my %datatype = (
0x0 => 'number',
0x1 => 'boolean',
0x2 => 'string',
0x3 => 'object',
0x5 => 'null',
0x6 => 'undefined',
0x8 => 'array',
0xa => 'raw-array',
0xb => 'object-date',
0xd => 'object-string-number-boolean-textformat',
0xf => 'object-xml',
0x10 => 'object-customclass',
);
# Return type, value in list context.
sub getTypeAndValue {
$log->logdie("expected to be called in LIST context") if !wantarray(
+);
# Read data type
my $value = undef;
my $type = getBytes(1);
my $type_as_txt = $datatype{$type};
if (!exists($datatype{$type})) {
$log->warn(qq{Missing datatype for '$type'!}) if $log->is_warn();
}
# Read element depending on type
if($type == 0) {
$value = getFloat();
} elsif($type == 1){
$value = getBytes(1);
} elsif ($type == 2) {
$value = getString();
} elsif($type == 3){
$value = getObject();
} elsif($type == 5) { # null
$value = undef;
} elsif($type == 6) { # undef
$value = undef;
} elsif($type == 8){ # array
$value = getArray();
} elsif($type == 0xb){ # date
$log->logdie("Not implemented yet: date");
} elsif($type == 0xf){ # doublestring
$log->logdie("Not implemented yet: doublestring");
} elsif($type == 0x10){ # customclass
$value = getObject(1);
} else {
$log->logdie("Unknown type:$type" );
}
return ($type_as_txt, $value);
}
# Return object - if customClass argument is given then read two
# strings instead of one.
sub getObject {
my $customClass = shift;
my @retvals = ();
while (eof($FH) != 1) {
# Read until end flag is detected : 00 00 09
if (getraw(3) eq $END_OF_OBJECT) {
return join(q{,}, @retvals);
}
# "un-read" the 3 bytes
seek($FH, -3, 1) or $log->logdie("seek failed");
# Read name
my $name = getString();
$log->debug(qq{name:$name}) if $log->is_debug();
# Read 2nd name if customClass is set
if ($customClass) {
push @retvals, q{class_name=} . $name . q{;};
my $name = getString();
$log->debug(qq{name:$name (2nd name - customClass)}) if $log->is
+_debug();
$customClass = 0;
}
# Get data type and value
my ($type, $value) = getTypeAndValue();
$log->debug(qq{type:$type value:$value}) if $log->is_debug();
push @retvals, $name . q{;} . $value;
}
$log->logdie("Syntax error: reached end-of-file before end-of-object
+");
}
# Return array (list)
sub getArray {
my @retvals = ();
my $count = getlong();
if($count == 0) {
return getObject();
}
ELEMENT:
while ($count-- > 0) {
my $name = getString();
if (!defined($name)) {
last ELEMENT;
}
my $retval = undef;
my ($type, $value) = getTypeAndValue();
{
no warnings q{uninitialized}; # allow undef values
$log->debug(qq{$name;$type;$value}) if $log->is_debug();
$retval = qq{$name;$type;$value};
}
push @retvals, $retval;
}
# Now expect END_OF_OBJECT tag to be next
if (getraw(3) eq $END_OF_OBJECT) {
return join(q{,}, @retvals);
}
$log->error(q{Did not find expected END_OF_OBJECT! at end of array!}
+) if $log->is_error();
return;
}
sub getraw {
my $len = shift;
$log->logdie("missing length argument") unless $len;
my $buffer = undef;
my $num = read($FH, $buffer, $len);
return $buffer;
}
# read given number of bytes, default 1;
sub getBytes {
my $len = shift || 1;
my $buffer = undef;
my $num = read($FH, $buffer, $len);
return unpack("c*", $buffer);
}
# Read string: first 2 bytes length, then string itself. operates on
# global filehandle FH. Read length first unless length is given,
# otherwise read the given number of bytes.
sub getString {
my $len = shift;
my $buffer = undef;
my $num = undef;
# read length from filehandle unless set
$len = join(q{}, getBytes(2)) unless ($len);
# return undef if length is zero
return unless $len;
$log->debug(qq{len:$len}) if $log->is_debug();
$num = read($FH, $buffer, $len);
$log->debug(qq{buffer:$buffer}) if $log->is_debug();
return $buffer;
}
# read integer number, default 2 bytes
sub getint {
my $len = shift || $LENGTH_OF_INTEGER;
my $buffer = undef;
my $num = read($FH, $buffer, $len);
return unpack 'c*', reverse $buffer;
}
# read long integer number, default 4 bytes
sub getlong {
my $len = shift || $LENGTH_OF_LONG;
my $buffer = undef;
my $num = read($FH, $buffer, $len);
return unpack 'c*', reverse $buffer;
}
# read floating point number: default 8 bytes
sub getFloat {
my $len = shift || $LENGTH_OF_FLOAT;
my $buffer = undef;
my $num = read($FH, $buffer, $len);
return unpack 'd*', reverse $buffer;
}
# Read file header - 16 bytes in total. Return name if file starts
# with sol header, otherwise undef. Failure means the 'TCSO' tag is
# missing.
sub readHeader {
# skip first 6 bytes
getString(6);
# next 4 bytes should contain 'TSCO' tag
if (getString(4) ne q{TCSO}) {
$log->error("missing TCSO - not a sol file") if $log->is_error();
return; # failure
}
# Skip next 7 bytes
getString(7);
# Read next byte (length of name) + the name
my $name = getString(getint(1));
$log->debug("name:$name") if $log->is_debug();
# Skip next 4 bytes
getString(4);
return $name; # ok
}
# read an element, return "name;datatype;value"
sub readElement {
my $retval = undef;
# Read element length and name
my $name = getString(getint(2));
# Read data type and value
my ($type, $value) = getTypeAndValue();
{
no warnings q{uninitialized}; # allow undef values
$log->info(qq{$name;$type;$value}) if $log->is_info();
$retval = qq{$name;$type;$value};
}
# Read trailer (single byte)
my $trailer = getBytes(1);
if ($trailer != 0) {
$log->warn(qq{Expected 00 trailer, got '$trailer'}) if $log->is_wa
+rn();
}
return $retval;
}
# ------ parse file ------
sub parse {
my $file = shift;
$log->logdie( q{Missing argument file.}) if (!$file);
$log->logdie(qq{No such file '$file'}) if (! -f $file);
$log->debug("start") if $log->is_debug();
open($FH,"< $file") || $log->logdie("Error opening file $file");
$log->debug(qq{file:$file}) if $log->is_debug();
binmode($FH);
my @retvals = ();
# Read header
my $name = readHeader() or $log->logdie("Invalid sol header");
push @retvals, $name;
# Read data elements
while (eof($FH) != 1) {
push @retvals, readElement();
}
close($FH) or $log->logdie(q{failed to close filehandle!});
return @retvals;
}
1;
__END__
=pod
=head1 NAME
Sol::Parser - a .sol file reader
=head1 SYNOPSIS
use Sol::Parser;
my @content = Sol::Parser::parse("settings.sol");
print join("\n", @content);
=head1 DESCRIPTION
Local Shared Object (LSO), sometimes known as flash cookies, is a
cookie-like data entity used by Adobe Flash Player. LSOs are stored
as files on the local file system with the I<.sol> extension. This
module reads a Local Shared Object file and return content as a list.
=head1 SOL DATA FORMAT
The SOL files use a binary encoding. It consists of a header and any
number of elements. Both header and the elements have variable length
+s.
=head2 Header
The header has the following structure:
=over
=item * 6 bytes (discarded)
=item * 4 bytes that should contain the string 'TSCO'
=item * 7 bytes (discarded)
=item * 1 byte that signifies the length of name (X bytes)
=item * X bytes name
=item * 4 bytes (discarded)
=back
=head2 Element
Each element has the following structure:
=over
=item * 2 bytes length of element name (Y bytes)
=item * Y bytes element name
=item * 1 byte data type
=item * Z bytes data (depending on the data type)
=item * 1 byte trailer
=back
=head1 TODO
=head2 Support I<XML> output
=head2 Add support for datatypes I<date> and I<doublestring>.
=head1 SEE ALSO
=head2 Local Shared Object
http://en.wikipedia.org/wiki/Local_Shared_Object
=head2 Flash coders Wiki doc on .Sol File Format
http://sourceforge.net/docman/?group_id=131628
=head2 A Python sol file converter
http://osflash.org/s2x
=head1 AUTHOR
andreas1234567 on perlmonks.org
=cut
Update Tue Dec 18 14:18:29 CET 2007: I acknowledge Anonymous Monk's comment on Readonly. It will be replaced by use constant once released on CPAN.
Update Wed Dec 19 16:50:20 CET 2007 Released on CPAN as Sol::Parser.
Update Thu Dec 20 09:57:22 CET 2007 Released on CPAN as Parse::Flash::Cookie. |