open(IN1, '<', $input1) or die "Can't read source file $input1 : $!\n"; my $minlength = 1<<20; my %distrbtn; while() { next if />/; chomp; my $len = length(); $distrbtn{$len}++; $minlength = $len if ( $minlength > $len ); } close IN1; $minlength -= 3; open(IN2, '<', $input2) or die "Can't read source file $input2 : $!\n"; my $header; my @source; my %source_lengths; while() { chomp; if ( />/ ) { $header = $_; } elsif ( length() >= $minlength ) { push @source, $header; push @source, $_; push @{$source_lengths{length()}}, $#source; } } close IN2; ##

##

my $max_source_length = ( sort {$b<=>$a} keys %source_lengths )[0];

for my $key ( sort {$a<=>$b} keys %distrbtn ) {
    my $size = $key - 3;
    my $freq = $distrbtn{$key};

# find the first set of source strings of equal or greater size:

    my $source_key = $key;
    while  ( $source_key <= $max_source_length and
             not exists( $source_lengths{$source_key} )) {
        $source_key++;
    }
    if ( $source_key > $max_source_length ) {
        die "We can't do this: strings from $input2 aren't long enough";
    }
    ...
}

##

##

    my @usable_sources = @{$source_lengths{$source_key}};
    printf "for an input1 string of length %d, we can choose from %d input2 strings\n", $size, scalar @usable_sources;

# in case we want to add more sources that happen to be longer:
    $source_key++;
    while ( $source_key <= $max_source_length and
            scalar @usable_sources < $freq ) {
        push @usable_sources, @{$source_lengths{$source_key}} if exists($source_lengths{$source_key};
    }
    if ( $freq > @usable_sources ) {
        warn "We ran short of desired frequency for length $size\n";
    elsif ( $freq < @usable_sources ) {
        # do something to randomly remove items from @usable_sources...
    }
    for my $offset ( @usable_sources ) {
        my $header = $source[$offset-1];
        my $string = $source[$offset];
        # do whatever...
    }