Hello, World

##

 Sunday

##

##


 Sunday


##

##

my @elements = $xml =~ /$XML_SPE/g;

##

##

my @elements = $xml =~ /$::XML_SPE/g;

##

##

#!perl
# use strict;
use warnings;

# regex from http://www.cs.sfu.ca/~cameron/REX.html#AppA
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser 
# University, November, 1998.
# Copyright (c) 1998, Robert D. Cameron. 
# The following code may be freely used and distributed provided that
# this copyright and citation notice remains intact and that modifications
# or additions are clearly identified.

$TextSE = "[^<]+";
$UntilHyphen = "[^-]*-";
$Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
$CommentCE = "$Until2Hyphens>?";
$UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
$CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
$S = "[ \\n\\t\\r]+";
$NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
$NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
$Name = "(?:$NameStrt)(?:$NameChar)*";
$QuoteSE = "\"[^\"]*\"|'[^']*'";
$DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
$MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
$S1 = "[\\n\\r\\t ]";
$UntilQMs = "[^?]*\\?+";
$PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
$DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
$DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
$DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
$PI_CE = "$Name(?:$PI_Tail)?";
$EndTagCE = "$Name(?:$S)?>?";
$AttValSE = "\"[^<\"]*\"|'[^<']*'";
$ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
$MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
$XML_SPE = "$TextSE|$MarkupSPE";

use strict;

my $xml = join '', ;

my $nest = 0;
my $out = '';
my @elements = $xml =~ /$::XML_SPE/g; # see http://www.cs.sfu.ca�/~cameron/REX.html#A�ppA

tr/\n/ / for (@elements);

print "   $_\n" for (@elements);
print "\n";

for (@elements)
{
    if (/^

0); # only increment if inside an interesting


        next unless (/class\h*=\h*['"]da�ta['"]/); # \h is horizontal white space
        next unless (/id\h*=\h*['"](\w+)�['"]/);
        $out .= ", $1=";
        $nest = 1 if ($nest == 0); # if this is the outer most interesting


        next;
    }
    $nest--, next if (/^<\/div/);
    next if (/^[<]/); # skip other mark-up
    $out .= $_ if ($nest > 0);
}
$out =~ s/^, //;
print "$out\n";

__DATA__

 ]>


	
	Hello, World
	



Monday
Tuesday
Wednesday
Thursday

Friday



Satur

day

 Sunday
bbb
ddd
eee
ggg

Bye
]]>



##

##

   

    ]>

   

   

   

   
   Hello, World
   

   

   

   

   

   
   Monday
   
   
   Tuesday
   

   
   Wednes
   
   day
   
   

   
   
   Thursday
   
   

   
   Friday
   

   

   
   Satur
   
   day
   

   
    Sunday
   

   
   bbb
   

   
   ddd
   

   
   eee
   

   
   ggg
   

   

   
   Bye]]>