#!/usr/bin/perl # subsread2.plx package HTMLMods; =head1 DESCRIPTION Alternative to subread.plx - no control flows, just a 'master' sub which calls each sub to perform the HTML tag/attribute stripping/alteration, then returning the results. This application groups ALL the regexps into a single unit: 1. The HTML file is opened and inserted into an array (try this with a scalar too!) 2. A master subroutine is called, which calls other subs to perform HTML reformatting tasks 3. Each HTML reformatting sub completes its respective operations on the HTML file 4. Reformatted array is printed in DOS window. 5. OR write changes back to HTML source file. =head2 ALTERNATIVE FILE OPENING CODE my $path = "E:/Documents and Settings/Richard Lamb/My Documents/HTML"; open (INFILE, "$path/test1InLineCSS.html") or die ("$!: Can't open this file"); =head3 BACKREFS TO REMEDY ENTITY VALUE CHANGE PROBLEM? =cut use warnings; use diagnostics; use strict; # Declare and initialise variables. my @htmlFile; # Open HTML test file and read into array. open (INFILE, "E:/Documents and Settings/Richard Lamb/My Documents/HTML/test1InLineCSS.html"), or die ("$!: Can't open this file.\n"); @htmlFile = ; close (INFILE); sub masterCall { scrapUnderlineTags(); scrapBoldTags(); scrapItalicsTags(); scrapEmphasiseTags(); changeFontStyle(); changeFontSize(); changeFontColour(); changeBackColour(); addTextIndent(); addWordSpacing(); addLetterSpacing(); scrapImageTag(); } masterCall(); # Subroutine defintions # Removes underline tags in array sub scrapUnderlineTags { # iterates through each element (i.e. HTML line) in array foreach my $line (@htmlFile) { $line =~ s/<\/u>//ig; # case insensitivity and global search for pattern. $line =~ s///ig; } } # Removes bold tags in array sub scrapBoldTags { foreach my $line (@htmlFile) { $line =~ s/<\/?b>//ig; $line =~ s/<\/?big>//ig; $line =~ s/<\/?strong>//ig; $line =~ s/font-weight:\s?bold;?//ig; } } # Removes italics tags in array sub scrapItalicsTags { foreach my $line (@htmlFile) { $line =~ s/<\/?i>//ig; } } # Remove emphasise tags in array sub scrapEmphasiseTags { foreach my $line (@htmlFile) { $line =~ s/<\/?em>//ig; } } # Change font styles within in-line styles sub changeFontStyle { foreach my $line (@htmlFile) { $line =~ s/font-family:\s?Times;/font-family: Arial;/ig; } } # Change font size within in-line styles sub changeFontSize { foreach my $line (@htmlFile) { $line =~ s/font-size:\s?[0-9]{2}pt;?/font-size: 14pt/ig; } } # Change font colour within in-line styles sub changeFontColour { foreach my $line (@htmlFile) { $line =~ s/[^background-]color:\s?#(?:[0-9a-f]{6}|[0-9a-f]{3});?/"color: #000000;/ig; } } # Changes background colour attributes in array sub changeBackColour { foreach my $line (@htmlFile) { $line =~ s/background-color:\s?#(?:[0-9a-f]{6}|[0-9a-f]{3});?/background-color: #FFFFFF/ig; } } sub addTextIndent { foreach my $line (@htmlFile) { $line =~ s/(/$1; text-indent: 10px">/ig; $line =~ s/(/$1; text-indent: 10px">/ig; $line =~ s/(/$1; text-indent: 10px">/ig; } } # Inserts word spacing entities within in-line styles sub addWordSpacing { foreach my $line (@htmlFile) { $line =~ s/(/$1; word-spacing: 30px">/ig; $line =~ s/([^<.*?>]/$1; word-spacing: 10px">/ig; $line =~ s/(/$1; word-spacing: 10px">/ig; } } # Inserts letter spacing entities within in-line styles sub addLetterSpacing { foreach my $line (@htmlFile) { $line =~ s/(/$1; letter-spacing: 3px">/ig; $line =~ s/(/$1; letter-spacing: 2px">/ig; $line =~ s/(/$1; letter-spacing: 2px">/ig; } } # Removes image tag in array sub scrapImageTag { foreach my $line (@htmlFile) { $line =~ s///ig; } } # Print array to DOS window sub printHTML { for my $i (0..@htmlFile-1) { print $htmlFile[$i]; } } # Replacing original file with reformatted file! open (OUTFILE, ">E:/Documents and Settings/Richard Lamb/My Documents/HTML/test1InLineCSS.html") or die("$1: Can't rewrite the HTML file.\n"); print (OUTFILE @htmlFile); close (OUTFILE); # printHTML(); # sub called to print array in DOS window