#!/bin/perl5 use strict; use warnings; use HTML::TokeParser::Simple; use File::Find; use CSS::Tiny; # use diagnostics; # use Data::Dumper; my $dir = q|c:/web/root/|; # initialise hash with class names from the style sheet my $css = CSS::Tiny->new(); $css = CSS::Tiny->read( "${dir}cwi.css" ) or die "can't open style sheet $!"; my %class_hash; for my $style ( keys %{$css} ){ # a class name lies beween a dot and an optional hyphen $class_hash{$_}{'zz total'} = 0 if ($_) = $style =~ /\.([^:]*)/ ; } find(\&action, $dir); # generate report open my $o, '>', 'classes.txt' or die "can't open file for output $!"; for my $class ( sort keys %class_hash ){ my $total = $class_hash{$class}{'zz total'}; # next if $total > 10; my $widow = $total ? '' : 'widow'; print $o $class, "\t", $widow, "\n"; for my $path ( sort keys %{$class_hash{$class}} ){ print $o "\t$path\t$class_hash{$class}{$path}\n"; } } close $o; print "done\n"; sub action { # extract class names from each html file return if -d and ( /^_/ or /^pics/ ) and do { $File::Find::prune = 1 }; return unless -f and /\.html$/; my $tp = HTML::TokeParser::Simple->new( $_ ) or die "error opening html file $!"; while ( my $t = $tp->get_token ) { my $classes = $t->return_attr( 'class' ); if ( $classes ){ (my $path = $File::Find::name) =~ s/$dir//; # a class attribute may contain more than one class my @class_array = split ' ', $classes; for my $class (@class_array){ unless ( exists $class_hash{$class} ){ $class_hash{$class}{'zz orphan'}++; } $class_hash{$class}{$path}++; $class_hash{$class}{'zz total'}++; } } } }