#!perl use strict; use warnings; use Cache::FileCache; # use Data::Dump::Streamer; use DBM::Deep; use HTML::FormatText; use Mail::Sendmail 'sendmail'; use Params::Validate ':all'; use WWW::Mechanize; use XML::RSS; use vars ( '$DB', '$CACHE', '%CONFIG' ); main( sendmail => { smtp => '...', From => '...', To => '...' } ); exit; sub main { my %p = @_; initialize( %p ); do_jobs_perl_org( category => 'MN perl', url => 'http://jobs.perl.org/rss/standard.rss?limit=20', constraints => [ sub { /minnesota/i } ] ); do_jobs_perl_org( category => 'Tele perl', url => 'http://jobs.perl.org/rss/telecommute.rss?limit=20' ); do_jobs_perl_org( category => 'Domino perl', url => 'http://jobs.perl.org/rss/standard.rss?limit=20', constraints => [ sub { /\b(?:domino|lotus|notes)\b/i } ] ); # do_mn_job_bank( category => 'MN perl', # constraints => [ sub { /\bperl\b/i } ], # username => '...', # password => '...', # miles => 25, # zip => ..., # keyword => 'perl' ); #do_mn_job_bank( category => 'MN Domino', # constraints => [ sub { /\b(?:domino|lotus\s+notes)\b/i } ], # username => '...', # password => '...' ); return; } sub initialize { my %p = validate( @_, { test => { type => SCALAR, optional => 1, default => 0 }, cache_namespace => { type => SCALAR, optional => 1, default => '1 week' }, cache_autopurge_interval => { type => SCALAR, optional => 1, default => '1 hour' }, db_filename => { type => SCALAR, optional => 1, default => "$ENV{HOME}/.jobwatcher.db" }, wget_expiration => { type => SCALAR, optional => 1, default => '1 hour' }, sendmail => { type => HASHREF } } ); $DB = DBM::Deep->new( $p{db_filename} ); $CACHE = Cache::FileCache->new( { namespace => delete $p{cache_namespace}, autopurge_interval => delete $p{cache_autopurge_interval} } ); @CONFIG{keys %p} = values %p; return; } sub do_mn_job_bank { my %p = validate( @_, { category => { type => SCALAR }, constraints => { type => ARRAYREF, default => [ sub { 1 } ] }, username => { type => SCALAR }, password => { type => SCALAR }, zip => { type => SCALAR }, miles => { type => SCALAR }, keyword => { type => SCALAR },} ); my $www = WWW::Mechanize->new; # Log in $www->get( 'http://www.mnworks.org/jsli.cfm' ); $www->submit_form( fields => { li_user_name => $p{username}, li_password => $p{password} }, button => 'Submit' ); # Search $www->get( 'http://www1.mnworks.org/jobseeker/js_srch_pg.cfm' ); $www->submit_form( fields => { i_geo_search_flag => 'zip_yes', zip_miles => $p{miles}, zip_code => $p{zip}, Submit => 'More Search Options' } ); print $www->content; $www->submit_form( fields => { # i_geo_search_flags => 'zip_yes', # zip_miles => $p{miles}, # zip_code => $p{zip}, i_job_description => $p{keyword} } ); } sub do_jobs_perl_org { my %p = validate( @_, { category => { type => SCALAR }, url => { type => SCALAR }, constraints => { type => ARRAYREF, default => [ sub { 1 } ] } } ); my $rssurl = $p{url}; my $feed = rss_feed_from_url( url => $rssurl ); for my $job ( @{$feed->{items}} ) { my $url = $job->{link}; my $title = $job->{title}; my $desc = html2text( html => get( url => $url ) ); # Remove spaces @ the start of lines { my %spaces; ++ $spaces{$_} for map length(), $desc =~ /^( +)/gm; my $spaces_to_remove = ( sort { $spaces{$b} <=> $spaces{$a} } keys %spaces )[0]; my $space_to_remove = " " x $spaces_to_remove; $desc =~ s/^$space_to_remove//mg; } # Remove boilerplate & navigation at the top $desc =~ s/ (?s:.+?) (.+)[\r\n]+ [ \t]*=+[ \t]*[\r\n]+ /$1\n/x; # Now collapse the description fields $desc =~ s((.+)(?=^Description:)){ # Now I have a handle on just the key/value area ahead of the # description local $_ = $1; # Collapse field lines together s/^(.+:)[ \t]*[\r\n]+ (.+)/$1 $2/mgx; # Remove empty lines s/^[ \t]*[\r\n]+//gm; # Remove trailing / leading whitespace s/^\s+//; s/\s+$//; # Add in the \n I want. "$_\n"; }sem; # Finally remove the boilerplate from the botton. $desc = reverse $desc; $desc =~ s/(?s:.+?) ^ [ \t]* -+ [ \t]* [\r\n]+ ^ [ \t]* [\r\n]+ //xsm; # $desc =~ s/.+?^[ \t]*-{30,}[ \t]*[\r\n]+^\s+//s; $desc = reverse $desc; $desc =~ s/\s+$//; $desc .= "\n"; if ( passes_constraints( constraints => $p{constraints}, title => $title, desc => $desc ) ) { notify( id => $url, title => $title, desc => "$p{category} $desc" ); } } return; } sub passes_constraints { my %p = validate( @_, { constraints => { type => ARRAYREF }, title => { type => SCALAR }, desc => { type => SCALAR } } ); my @constraints = @{delete $p{constraints}}; if ( not @constraints ) { @constraints = sub { 1 }; } local $_ = "$p{title}\n$p{desc}"; # When if ( /Job no longer in database/ ) { return 0; } for my $constraint ( @constraints ) { if ( $constraint->() ) { return 1; } } return 0; } sub notify { my %p = validate( @_, { id => { type => SCALAR }, title => { type => SCALAR }, desc => { type => SCALAR } } ); return if $DB->{notified}{$p{id}}; if ( $CONFIG{test} ) { print "NOTIFY $p{id}\n $p{title}\n"; return; } $DB->{notified}{$p{id}} = 1; sendmail( %{$CONFIG{sendmail}}, Subject => $p{title}, Text => $p{desc} ); } sub rss_feed_from_url { my %p = validate( @_, { url => { type => SCALAR } } ); my $url = delete $p{url}; my $parser = XML::RSS->new; $parser->parse( get( url => $url ) ); return $parser; } sub html2text { my %p = validate( @_, { html => { type => SCALAR } } ); return HTML::FormatText->format_string( $p{html} ); } sub get { my %p = validate( @_, { url => { type => SCALAR } } ); my $url = delete $p{url}; my $content = $CACHE->get( $url ); if ( not defined $content ) { my $www = WWW::Mechanize->new; $www->get( $url ); $content = $www->content; $CACHE->set( $url, $content, $CONFIG{wget_expiration} ); } return $content; }