package My::Hadoop::Example::Wordcount;
use Moose::Role;
sub map
{
my ($self,$line) = @_;
my @words = split( /\W+/, $line);
$self->emit( $_ => 1 ) for @words;
}
sub reduce
{
my ( $self, $key, $value_iterator) = @_;
my $sum = 0;
while( $value_iterator->has_next() )
{
my $value = $value_iterator->next();
$sum += $value;
}
$self->emit( $key, $sum );
}
sub combine
{
my ( $self, $key, $value_iterator) = @_;
my $sum = 0;
while( $value_iterator->has_next() )
{
my $value = $value_iterator->next();
$sum += $value;
}
$self->emit( $key, $sum );
}
package My::Hadoop::Example::Wordcount::Mapper;
use Moose;
with Hadoop::Streaming::Mapper, My::Hadoop::Example::Wordcount;
package My::Hadoop::Example::Combiner::Wordcount::Mapper;
use Moose;
with Hadoop::Streaming::Combiner, My::Hadoop::Example::Wordcount;
package My::Hadoop::Example::Wordcount::Reducer;
use Moose;
with Hadoop::Streaming::Reducer, My::Hadoop::Example::Wordcount;
1;
####
#!/usr/bin/perl
use My::Hadoop::Example;
My::Hadoop::Example::Mapper->run();
####
#!/usr/bin/perl
use My::Hadoop::Example;
My::Hadoop::Example::Combiner->run();
####
#!/usr/bin/perl
use My::Hadoop::Example;
My::Hadoop::Example::Reducer->run();
####
hadoop \
jar $streaming_jar_name \
-D mapred.job.name="my hadoop example" \
-input my_input_file \
-output my_output_hdfs_path \
-mapper my_mapper \
-combiner my_combiner \
-reducer my_reducer