Hi!
It seems doesn't work.
my $regex = "href\s*=\s*\"?.*\"";
I think it should be
my $regex = qr{href\s*=\s*".*?"};
Note
qr// and question mark position. So perhaps your code should look something like this:
use strict;
use warnings; # this is also useful
my $regex = qr{href\s*=\s*".*?"};
my $sub = "href=\"#\"";
while(<DATA>){
s/$regex/$sub/g;
print;
}
__DATA__
afjalsdfj href="asfasdfa" afdsas href="akjshfakjsd"
href = "ajsfhaklj"
Update: and here's another approach using HTML::Parser:
use strict;
use warnings;
use HTML::Parser;
use HTML::Entities;
my $html = join '', <DATA>;
my $p = HTML::Parser->new(
default_h => [ sub { print shift }, 'text' ],
comment_h => [""],
start_h => [ \&start, 'tag,attr,attrseq,text' ]
);
$p->parse($html);
sub start {
my ( $tag, $attr, $attrseq, $text ) = @_;
unless ( exists $attr->{href} ) {
print $text;
}
else {
$attr->{href} = "#";
print "<$tag";
print " $_=\"", encode_entities( $attr->{$_} ), '"' for (@$att
+rseq);
print ">";
}
}
__DATA__
<html><body>
<h1>title</h1>
<a href="foo" class="class">link</a>
<p>some <a href="link">text</a> and <a href="another_link">more</a>
</body></html>