#!/usr/bin/perl
# remove img & anchor tags.plx
# Program will read in an html file, remove the img tag and print out entire doc.
# 1. No need for file variable yet: open (INFILE, "<".$htmlFile) or die("Can't read source file!\n");
# 2. Alternative: m/<A\s+HREF=[^>]+>(.*?)<\/A>/  - Will not remove closing tag though - why?
# 3. Why is interpreter flipping-out over an 'undefined variable', when
#    original regexp, m/<A\s+HREF=[^>]+>(.*?)<\/A>/, is known to work. What am I missing?

use warnings;
use diagnostics;
use strict;
use HTML::Parser;    # Include this module for future reference - may need to abandon
                     # regexps in favour of parse-trees.

# Declare and initialise variables.
my $pattern1 = '<IMG\s+(.*)>';
my $pattern2 = '<A\s+HREF\s*=[^>]+>';
my $pattern3 = '</A>';
my @htmlLines;

# Open HTML test file and read into array.
open INFILE, "E:\\Documents and Settings\\Richard Lamb\\My Documents\\HTMLworkspace\\HTML practice\\My First Page!\\firsttest.html" or die "Sod! Can't open this file.\n";
@htmlLines = <INFILE>;
close (INFILE);

# Test for presence of patterns in HTML file
if($pattern1)
{
  scrapImageTag(); # calls to remove image tags
}
else
{
  print "No tags matching this pattern within the HTML document.\n";
}

if($pattern2 && $pattern3)
{
  scrapAnchorTag();
}
else
{
  print "No tags matching this pattern within the HTML document.\n";
}

# Removes image tag elements in array
sub scrapImageTag
{
  foreach my $line (@htmlLines)
  {
    # replace <IMG ...> with nothing.
    $line =~ s/$pattern1//ig;  # case insensitivity and global search for pattern
  }
}

# Removes anchor tag elements in array
sub scrapAnchorTag
{
  foreach my $line (@htmlLines)
  {
    # replace <A HREF ...> with nothing.
    $line =~ s/$pattern2//ig;  # case insensitivity and global search for pattern
    $line =~ s/$pattern3//ig;  # case insensitivity and global search for pattern
  }
}

printHTML();

# prints the reformatted HTML doc
sub printHTML
{
  for my $i (0..@htmlLines-1)
  {
    print $htmlLines[$i];
  }
}

print "\n\n";
sleep 2;
print "Success?!\n";