##
Sunday
####
Sunday
####
my @elements = $xml =~ /$XML_SPE/g;
####
my @elements = $xml =~ /$::XML_SPE/g;
####
#!perl
# use strict;
use warnings;
# regex from http://www.cs.sfu.ca/~cameron/REX.html#AppA
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser
# University, November, 1998.
# Copyright (c) 1998, Robert D. Cameron.
# The following code may be freely used and distributed provided that
# this copyright and citation notice remains intact and that modifications
# or additions are clearly identified.
$TextSE = "[^<]+";
$UntilHyphen = "[^-]*-";
$Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
$CommentCE = "$Until2Hyphens>?";
$UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
$CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
$S = "[ \\n\\t\\r]+";
$NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
$NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
$Name = "(?:$NameStrt)(?:$NameChar)*";
$QuoteSE = "\"[^\"]*\"|'[^']*'";
$DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
$MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
$S1 = "[\\n\\r\\t ]";
$UntilQMs = "[^?]*\\?+";
$PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
$DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
$DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
$DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
$PI_CE = "$Name(?:$PI_Tail)?";
$EndTagCE = "$Name(?:$S)?>?";
$AttValSE = "\"[^<\"]*\"|'[^<']*'";
$ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
$MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
$XML_SPE = "$TextSE|$MarkupSPE";
use strict;
my $xml = join '', ;
my $nest = 0;
my $out = '';
my @elements = $xml =~ /$::XML_SPE/g; # see http://www.cs.sfu.ca/~cameron/REX.html#AppA
tr/\n/ / for (@elements);
print " $_\n" for (@elements);
print "\n";
for (@elements)
{
if (/^ 0); # only increment if inside an interesting
next unless (/class\h*=\h*['"]data['"]/); # \h is horizontal white space
next unless (/id\h*=\h*['"](\w+)['"]/);
$out .= ", $1=";
$nest = 1 if ($nest == 0); # if this is the outer most interesting
next;
}
$nest--, next if (/^<\/div/);
next if (/^[<]/); # skip other mark-up
$out .= $_ if ($nest > 0);
}
$out =~ s/^, //;
print "$out\n";
__DATA__
]>
Hello, World
MondayTuesday
Wednesday
Thursday
Friday
Satur
day
Sunday
bbb
ddd
eee
ggg
Bye
]]>
####
]>
Hello, World
Monday
Tuesday
Wednes
day
Thursday
Friday
Satur
day
Sunday
bbb
ddd
eee
ggg
Bye
]]>