#!/usr/local/bin/perl -w use strict; # Digs in your browser's cache # like a statistician in your trashcan... package Lurker; use File::Find; use File::MMagic; my $cache = { IMAGES => [], DOCS => [], }; sub lurk { my $dir = shift; my $mm = new File::MMagic; print STDERR "Reading cache..."; find( sub { for ( $File::Find::name ) { my $res = $mm->checktype_filename($_); push @{ $cache->{ IMAGES }}, $_ if ($res =~ m/image\//) ; push @{ $cache->{ DOCS }}, $_ if ($res =~ m/text\/html/) } }, $dir ); print STDERR "OK!\n"; } sub pick_random { my $what = shift; my $n = scalar( @{$cache->{ $what }} ); return ${$cache->{ $what }}[ rand $n ]; } package My_HTML_Parser; use base 'HTML::Parser'; sub start { my $self = shift; my ($tag, $attr, $attrseq, $origtext) = @_; my ($orig_src, $new_src); if ($tag eq 'img') { $orig_src = $attr->{'src'}; $new_src = Lurker::pick_random( 'IMAGES' ); $origtext =~ s/$orig_src/$new_src/; } print $origtext; } sub text { my $self = shift; my ($text) = @_; print $text; } sub end { my $self = shift; my ($tag) = @_; print ""; } package main; my $cache_directory = '/home/rshekhar/.mozilla/firefox/jg2e8cd7.default/Cache'; Lurker::lurk( $cache_directory ); my $doc = Lurker::pick_random('DOCS'); print STDERR "Now parsing $doc...\n"; my $a = new My_HTML_Parser; $a->parse_file( $doc );