open( LINKS, "+<", "filename" ); while () { my $lastread = tell LINKS; # keep track of where you are seek LINKS, 0, 2; # jump to end-of-file ... # do stuff with current value of $_, including: print LINKS "another line of data\n"; ... # when all done adding updates at the end... seek LINKS, $lastread, 0; # jump back for next read: } #### ... next if exists( $fetched{$url} ); ... #### while (-s "list1") { open( INP, "list2" ) or die $!; while () { chomp; (my $pagefile = $_ ) =~ s{[ \\/?&#!;]+}{_}g; # transform url into a safe file name # (update: added backslash to the character set) next if ( -e $pagefile ); # skip if we already have it ... # fetch the page, extract links if any ... foreach $link ( @links ) { ... # work out the full url ... print OUT $url, "\n"; } open PAGE, ">$pagefile"; # save current page to its own file print PAGE $content; close PAGE; } close INP; close OUT; system( "sort -u list2 > list1" ); # rewrite list1 with only the unique strings in list2 } print "list1 is empty now. We must be done.\n";