Usage: rhn_channel_download.pl --username --password [ optional options ] required: --username the username you got from RHN network --password the password for RHN Network optional: --channel the RHN channel you wish to download from (if not set, script will present a selection) --architecture your Hardware architecture (if not set, script will present a selection) --destination the directory you want to download to (defaults to '.') --list The file (without path) with the rpm-list (defaults to '.rpmlist' in the destination), will be updated --recipient Recipient for the mail, set this to a valid email address otherwise NO MAILS will be sent --sender Sender of the mail (defauls to some meaningful sender) --verbose More output (without -v, only summary and some warnings will be displayed) --quiet Don't print summary --help This help #### #!/usr/bin/perl -w # # Name: rhn_channel_download.pl # # Description: Download rpm packages from RHN for legal subscriptions that have no access to RHN # # This script will connect to the RHN website, find any (new) packages that belong # to a (selectable) channel, download them, check their md5 sums and then send a # summary mail to a recipient. # # RedHat sponsors the Open Source community with good money and brains, # so please use this only if you have a legal subscription and valid licenses. # Especially the first time you download a channel, it will hammer their servers! # If they find many people doing this over and over again, they might change their # website which will make this script fail! Thanks. # # Author: svenXY (happy PerlMonk) mail me AT gmx dot net # # run with --help for help # # 2008-09-10 use strict; use WWW::Mechanize; use WWW::Mechanize::FormFiller; use URI::URL; use Tie::File; use Digest::MD5; use Getopt::Long; use HTML::TableExtract qw(tree); use Encode; ############### Variables and setup ############## my ($verbose, $quiet, $help, $recipient, $channel, $arch, $username, $password); my $destination_dir = '.'; my $rpmlist = '.rpmlist'; my $sender = 'RHN packages update '; GetOptions ('verbose' => \$verbose, 'quiet' => \$quiet, 'destination:s' => \$destination_dir, 'list:s' => \$rpmlist, 'channel:s' => \$channel, 'architecture:s' => \$arch, 'username:s' => \$username, 'password:s' => \$password, 'help' => \$help, 'sender:s' => \$sender, 'recipient:s' => \$recipient ); $rpmlist = $destination_dir . '/' . $rpmlist; usage() if $help; usage() unless ( defined $username && defined $password ); ##################################### # Tie to rpmlist file and read it in #################################### warn "Warning: RPM list file ($rpmlist) does not yet exist. An empty list file will now be created.\n" unless (-f $rpmlist); tie my @existing_packages, 'Tie::File', $rpmlist or die "Could not tie to $rpmlist: $!"; my %existing_packages = map { $_ => 1 } @existing_packages; #################################### # HTTP to RHN network #################################### my $agent = WWW::Mechanize->new( autocheck => 1 ); my $formfiller = WWW::Mechanize::FormFiller->new(); $agent->env_proxy(); #$agent->get('http://www.redhat.com/rhn'); changed html again... safe_get('https://www.redhat.com/wapps/sso/rhn/login.html?redirect=http%3A%2F%2Frhn.redhat.com%2Frhn%2FYourRhn.do'); $agent->form_number(1) if $agent->forms and scalar @{$agent->forms}; #$agent->form_number(2); { local $^W; $agent->current_form->value('username', $username); }; { local $^W; $agent->current_form->value('password', $password); }; #$agent->form_number(2); $agent->submit(); print "Connected to RHN.\n" if $verbose; ################################################################ # get available Channels, architectures as well as the links that lead there ################################################################ # the second is probably better as it is language independant #$agent->follow_link('text' => 'Channels'); $agent->follow_link( 'url_regex' => qr/channels/ ); my $te = HTML::TableExtract->new( attribs => { 'id' => 'channel-list' }, keep_html => 1 ); $te->parse(decode_utf8 $agent->content()); my $table = $te->first_table_found; my $table_tree = $table->tree; my @channels; foreach my $row (0..$table_tree->maxrow()) { next unless $table_tree->cell($row,0)->as_HTML() =~ /-channel/; (my $rhnchannel = $table_tree->cell($row,0)->as_text() ) =~ s/^.*>\s*(.+?)\s*$/$1/g; if ( defined($channel) ){ next unless $rhnchannel eq $channel; } for (@{ $table_tree->cell($row,1)->extract_links('a') }) { my($link, $element, $attr, $tag) = @$_; my ($channel_id, $channel_arch) = $element->as_text() =~ m/\?cid=(\d+)">(.*)\s$/; if ( defined($arch) ){ next unless $channel_arch eq $arch; } push(@channels, [ $rhnchannel, $channel_arch, $channel_id ] ); } } ################################################################ # ask the user for a channel if more than one has been found ################################################################ my $selection; if (scalar @channels == 1) { $selection = 0; } else { my $index = 0; for (@channels) { print "[", $index++, "] ", join(' - ', $_->[0], $_->[1]), "\n"; } while ( not defined $selection ){ print "Please select a Channel [0 - ", $index-1, "]: "; $selection = ; chomp $selection; $selection = undef if ($selection =~ /[\D]+/ || $selection > $index); } } print "Selected channel: $channels[$selection][0] - $channels[$selection][1] (#$channels[$selection][2])\n" if $verbose; ############################################################################### # HTTP to rpmlist for the selected channel ################################################################################ $agent->get( 'https://rhn.redhat.com/network/software/channels/packages.pxt' . '?upper=100000&filter_string=&lower=1&alphabar_column=NVREA&cid=' . $channels[$selection][2]); print "Retrieving package data.\n" if $verbose; # fixed - thanks to yves-alexis #my @links = $agent->find_all_links( url_regex => qr/details\.pxt\?pid=/ ); my @links = $agent->find_all_links( url_regex => qr/Overview\.do\?pid=/ ); my %avail_packages; my %packages = ( 'new' => [], 'existing' => 0, 'dl_error' => 0, 'md5_error' => 0, ); print "Downloading new packages.\n" if $verbose; my $dl_count = 0; ############################################################################### # work on packages ################################################################################ for my $link ( @links ) { my $url = $link->url_abs; my $rpmname = $link->text; my $pkg_id; ($pkg_id = $url) =~ s/^.*pid=//; $avail_packages{$rpmname} = $pkg_id; ################################################################ # download new packages ################################################################ if (! $existing_packages{$rpmname} ) { $agent->get('https://rhn.redhat.com/network/software/packages/details.pxt?pid=' . $pkg_id); $agent->content =~ m{MD5 Sum:\s+([a-f0-9]+)}s; my $md5 = $1; my $pkg_link = $agent->find_link( text => 'Download Package'); print ++$dl_count . " - Downloading new package: $rpmname ($pkg_id)" if $verbose; $agent->get( $pkg_link->url_abs, ':content_file' => "$destination_dir/$rpmname.rpm" ); if ($agent->success()){ #################################################### # compare md5 sum from webpage with computed one #################################################### if (compare_md5($md5, "$destination_dir/$rpmname.rpm")) { print " ... success!\n" if $verbose; push(@existing_packages, $rpmname); push(@{$packages{'new'}}, $rpmname); } else { print "... MD5 sum mismatch, removing package\n" if $verbose; unlink "$rpmname.rpm"; $packages{'md5_error'}++; } } else { print " ... failure!\n" if $verbose; $packages{'dl_error'}++; } } else { $packages{'existing'}++; } } ################################################################ # update rpmlist with added packages ################################################################ @existing_packages = sort {uc($a) cmp uc($b)} @existing_packages; ## remove stupid empty lines... @existing_packages = grep {!/^$/} @existing_packages; untie @existing_packages; ################################################################ # generate summary, print it to STDOUT and send it as mail ################################################################ my $sum_new_packages = scalar @{$packages{'new'}}; my $output="############### RPM Package Mirror Tool ######################## ### Channel: $channels[$selection][0] - $channels[$selection][1] ### Skriptname: $0 ################################################################ $packages{'existing'} packages have not been downloaded again. $packages{'dl_error'} packages were not properly downloaded. $packages{'md5_error'} packages were not added as their MD5 sum did not match. ################################################################ $sum_new_packages packages have been added: ################################################################\n" . join("\n", sort @{$packages{'new'}}) . "\n" . "################################################################\n"; if ( $verbose || ( $sum_new_packages > 0 && ! $quiet )) { print $output; } if ( $sum_new_packages > 0 && defined($recipient)) { send_mail($output, $recipient, $sender, $sum_new_packages); } ################################################################ # you will only want to run createrepo on exit 0... ################################################################ exit 2 unless $sum_new_packages > 0; exit 0; ############################################################################### sub send_mail { ############################################################################### my ($output, $recipient, $sender, $num) = @_; open(SENDMAIL, "|/usr/lib/sendmail -oi -t -odq") or die "Can't fork for sendmail: $!\n"; print SENDMAIL <<"EOF"; From: $sender To: $recipient Subject: RHN update script added $num packages $output EOF close(SENDMAIL) or warn "sendmail didn't close nicely"; } ############################################################################### sub compare_md5 { ############################################################################### my ($md5, $file) = @_; open(FILE, $file) or do { warn "Can't open '$file' to calculate md5sum: $!"; return 0; }; binmode(FILE); return (Digest::MD5->new->addfile(*FILE)->hexdigest eq $md5)?1:0; } ############################################################################### sub usage { ############################################################################### print< --password [ optional options ] required: --username the username you got from RHN network --password the password for RHN Network optional: --channel the RHN channel you wish to download from (if not set, script will present a selection) --architecture your Hardware architecture (if not set, script will present a selection) --destination the directory you want to download to (defaults to '.') --listfile The file (without path) with the rpm-list (defaults to '.rpmlist' in the destination), will be updated --recipient Recipient for the mail, set this to a valid email address otherwise NO MAILS will be sent --sender Sender of the mail (defauls to some meaningful sender) --verbose More output (without -v, only summary and some warnings will be displayed) --quiet Don't print summary --help This help EOH exit 0; }