[Updated] Statistician in my garbage...

In Statistician in my garbage..., monk larsen had shown the code for randomly putting together texts and images from your browser's cache, thus giving you a snapshot of what your browsing behavior is. The code depended on the filename to determine the file type. It was not working for my Firefox's cache, as Firefox squashes filenames into something else. I updated the code to use File::MMagic to determine the file type.

#!/usr/local/bin/perl -w


use strict;


# Digs in your browser's cache 
# like a statistician in your trashcan...

package Lurker;

use File::Find;
use File::MMagic;


my $cache = {
    IMAGES => [],
    DOCS => [],
};

sub lurk
{
    my $dir = shift;
    my $mm = new File::MMagic;
    print STDERR "Reading cache...";
    
    find(
     sub 
     {   
         for ( $File::Find::name ) {
           my $res = $mm->checktype_filename($_);
           push @{ $cache->{ IMAGES }}, $_ if ($res =~ m/image\//)  ;
           push @{ $cache->{ DOCS }}, $_   if ($res =~ m/text\/html/) 
         }
       }, $dir 
    );
    
    print STDERR "OK!\n";
  }

sub pick_random
{
    my $what = shift; 
    
    my $n = scalar( @{$cache->{ $what }} );
    
    return ${$cache->{ $what }}[ rand $n ];
}




package My_HTML_Parser;

use base 'HTML::Parser';

sub start
{
    my $self = shift;
    my ($tag, $attr, $attrseq, $origtext) = @_;
    
    my ($orig_src, $new_src);
    
    if ($tag eq 'img') {
    $orig_src = $attr->{'src'};        
    $new_src = Lurker::pick_random( 'IMAGES' );
    $origtext =~ s/$orig_src/$new_src/;
    }
    print $origtext;
}

sub text
{
    my $self = shift;
    my ($text) = @_;
    
    print $text;
}

sub end
{
    my $self = shift;
    my ($tag) = @_;
    
    print "</$tag>";
}



package main;

my $cache_directory = '/home/rshekhar/.mozilla/firefox/jg2e8cd7.defaul
+t/Cache';

Lurker::lurk( $cache_directory );

my $doc = Lurker::pick_random('DOCS');

print STDERR "Now parsing $doc...\n";

my $a = new My_HTML_Parser;
$a->parse_file( $doc );
[download]

Tip: To find your Firefox's cache location, type about:cache in the location bar and check the Cache Directory:

Comment on [Updated] Statistician in my garbage... Download Code