#!/usr/bin/perl # remove img & anchor tags.plx # Program will read in an html file, remove the img tag and print out entire doc. # 1. No need for file variable yet: open (INFILE, "<".$htmlFile) or die("Can't read source file!\n"); # 2. Alternative: m/]+>(.*?)<\/A>/ - Will not remove closing tag though - why? # 3. Why is interpreter flipping-out over an 'undefined variable', when # original regexp, m/]+>(.*?)<\/A>/, is known to work. What am I missing? use warnings; use diagnostics; use strict; use HTML::Parser; # Include this module for future reference - may need to abandon # regexps in favour of parse-trees. # Declare and initialise variables. my $pattern1 = ''; my $pattern2 = ']+>'; my $pattern3 = ''; my @htmlLines; # Open HTML test file and read into array. open INFILE, "E:\\Documents and Settings\\Richard Lamb\\My Documents\\HTMLworkspace\\HTML practice\\My First Page!\\firsttest.html" or die "Sod! Can't open this file.\n"; @htmlLines = ; close (INFILE); # Test for presence of patterns in HTML file if($pattern1) { scrapImageTag(); # calls to remove image tags } else { print "No tags matching this pattern within the HTML document.\n"; } if($pattern2 && $pattern3) { scrapAnchorTag(); } else { print "No tags matching this pattern within the HTML document.\n"; } # Removes image tag elements in array sub scrapImageTag { foreach my $line (@htmlLines) { # replace with nothing. $line =~ s/$pattern1//ig; # case insensitivity and global search for pattern } } # Removes anchor tag elements in array sub scrapAnchorTag { foreach my $line (@htmlLines) { # replace with nothing. $line =~ s/$pattern2//ig; # case insensitivity and global search for pattern $line =~ s/$pattern3//ig; # case insensitivity and global search for pattern } } printHTML(); # prints the reformatted HTML doc sub printHTML { for my $i (0..@htmlLines-1) { print $htmlLines[$i]; } } print "\n\n"; sleep 2; print "Success?!\n";