# I'm guessing you meant "title" and not "html" here :) # Also, you'll want a /s after your regex to catch titles # that span multiple lines. Another approach is to # strip all the \n's and/or \r's out of $whole_file # before parsing it. $whole_file =~ /(.*)<\/html>/i; $file_index{ $file }{TITLE} = $1; # Ditto with the /s $whole_file =~ s/<[^>]*>//g; # This will work better as "split /\s+/, $whole_file" # since it will catch more than just single spaces @words = split / /, $whole_file;