sub do_bondagefiles {
my ($url, $html) = @_;
$_ = $html;
1 while (s@@ @gsi); # lose comments
# Search-and-replace, with @ seperating the search part,
# the replace part, and the search options.
# . These are the HTML comment characters.
# .*? matches anything in between the HTML
# comment characters. The dot means "any character",
# the * means "0 or more of", and the ? means "the
# shortest match", instead of the default of the longest.
# After the @ is the next argument, the replace string.
# It's a single space. So will be
# replaced by a single space.
# After the next @ is the final argument to the RE,
# the options. Options here are g, s, and i. g means
# "global"; if you find the same match multiple times,
# replace all of them. s means treat newlines as regular
# characters, instead of treating them specially. i
# means case insensitive search, which doesn't matter,
# since all of the characters in the search are symbols,
# which don't have a case.
s/[\r\n]+/ /gs;
s@^.*?(, followed by a word boundary,
# followed by the literal string blacktri.gif,
# followed by another word boundary, followed by
# zero or more of any character, followed by the
# end of the string. Replace with an empty string.
# Search treats newline as normal characters, and is case
# insensitive.
s@(]*\bHREF\b)@\n\001\001\001\n$1@gi;
# Save this:
# * The literal string ,
# followed by
# * the literal string HREF, followed by
# * another word boundary.
# Replace this with a newline character, three
# characters with character code 1, another newline,
# and the captured string.
# Case treats newlines as normal characters, and is case
# insensitive.
my @sec1 = split (/\n\001\001\001\n/s);
my @sec2 = ();
foreach (@sec1) {
next if (m/^\s*$/s);
s@^\s*]*?\bHREF=\"([^<>\"]+)\"[^<>]*>\s*(.*?)\s*\s*@@i
+s ||
error ("unparsable entry (url) in $url");
# Search for the beginning of the string, followed
# by
# (taking the shortest match), then another word
# boundary character, then the string HREF="
# Save into register 1:
# * one or more characters which are none of
# <, >, or ".
# Then look for a quote, followed by zero or more
# characters which are neither < nor >, followed by
# a > character, followed by zero or more spaces.
# Save into register 2:
# * Zero or more characters (shortest match)
# followed by zero or more spaces, followed by ,
# followed by zero or more spaces.
# Replace with the empty string.
# Search is case-insensitive, and newlines are treated
# as regular characters.
# *
my $eurl = $1;
# $1 is register 1 from the above RE.
my $title = $2;
# $2 is register 2 from the above RE.
my $date = '';
my $body = $_;
$body =~ s@<[^<>]*>@@g; # lose tags in body
push @sec2, ($eurl, $date, $title, $body);
}
return @sec2;
}