#! /usr/bin/perl -w
use strict;
use warnings;
use lib "c:/strawberry/perl/site/lib";
use HTML::Strip;

my $hs = HTML::Strip->new();

#Where I will store the end results;
my $write_dir = 'G:\research\sec filings 10k and 10Q\data\filing docs\1993\Clean';

#Where the files with the HTML tags are located; 
my $files_dir  = 'C:\Dwimperl\Perl\1993';

#Open the directory where the target files with HTML tags are located; 
#Why am I doing this? Stores file names in a directory handle?
opendir (my $dir_handle, $files_dir) || die "failed to open '$files_dir' <$!>"; 

#Loop through each entry/file in the directory;
#What is readdir doing here? It's not really reading anything;
#Is it simply advancing us to the next entry?;
#Seems like the real READ occurs via the OPEN statement below;
while (my $file = readdir($dir_handle) )      { 
    
    next unless -f $file; 
    #next if $file eq '.' or $file eq '..';   
      
    #Open the current file so I can strip the HTML tags ??? ;      
    open my $file_handle, '<', $file or die "failed to open '$file' <$!>";
       
        #Read the current file one line at a time??;
        while (my $line = <$file_handle>) {     
           
########The WHILE statement above must return FALSE cuz the program never makes it here;
           
        #Strip the HTML tags??;
        my $clean_text = $hs->parse( '  ' );
        
        #Save the clean (no HTML tags) text file in a new file/location??;
        print $write_dir "$file\n";
        
       $hs->eof;
   
   } 
           
    }
 
close();

closedir $dir_handle;