Re: Date Array Convolution
by zentara (Cardinal) on Nov 03, 2011 at 20:00 UTC
|
#!/usr/bin/perl -w
use strict;
my @data;
while (<DATA>){
my @dataline = split (/\s+/,$_);
print "@dataline\n";
push(@data,\@dataline)
}
print "\n@data\n\n";
&printarray(\@data);
print "\n";
my @data_sorted_by_four_fields = sort {
$a->[2] <=> $b->[2]
|| $a->[0] <=> $b->[0]
|| $a->[4] <=> $b->[4]
|| $a->[1] cmp $b->[1]
} @data;
&printarray( \@data_sorted_by_four_fields );
print "\n";
###########################################3
# Print the contents of the array
sub printarray {
my $aref=shift;
foreach my $record (@$aref) {
for my $i (0..4) {
print $record->[$i] . " ";
}
print "\n";
}
}
#############################################
__DATA__
1040564312 z 89 Out 4194077715
1040564322 w 90 Out 4194081727
1040564335 x 94 IN 4194085256
1040564335 y 94 Out 4194085196
1040564312 z 89 In 258381720
1040564322 z 90 In 258385268
| [reply] [d/l] |
|
I think I have the multi-dimensional sorting piece figured out (just sorting by start times) with:
@combined = sort {$a->[0] <=> $b->[0]} (@listone,@listtwo);
What I really need help with is the piece that covers those two difficulties I mentioned in the original post. Something like:
@combined = sort {$a->[0] <=> $b->[0]} (@listone,@listtwo);
foreach (@combined) {
#check for overlapping times and make sure the smallest V time is
+ listed during the overlap
#XXXXXXX
#break the timespans apart if it covers the crossing of a new day
#XXXXXXX
}
| [reply] [d/l] [select] |
Re: Date Array Convolution
by BrowserUk (Patriarch) on Nov 04, 2011 at 00:37 UTC
|
#! perl -slw
use strict;
use Data::Dump qw[ pp ];
sub dt2int {
my( $d, $h, $m ) = unpack '(A2)*', $_[0];
return ( ( $d - 1 ) * 24 + $h ) * 60 + $m;
}
sub int2dt {
sprintf "%02d%02d%02d", int($_[0]/1440)+1, int($_[0]/60)%24, $_[0]
+%60;
}
sub adjacentPairs (&@) {
my $code = shift;
map {
$code->( shift(), $_[0] );
} 1 .. @_;
}
my @listone =
(['010000','010010',2],['010200','010210',5],['012359','020001',3]
+);
my @listtwo = (['010005','010015',1],['010207','010211',4]);
my @res = adjacentPairs{
defined( $_[1] ) && $_[0][4] > $_[1][3]
? [ $_[0][0], int2dt( $_[1][3] - 1 ), $_[0][2] ]
: [ @{ $_[0] }[ 0 .. 2 ] ]
} sort {
$a->[ 3 ] <=> $b->[ 3 ]
} map {
my $s = dt2int( $_->[ 0 ] );
my $e = dt2int( $_->[ 1 ] );
my @out;
while( int( $s / 1440 ) != int( $e / 1440 ) ) {
my $newe = ( $s + 1440 ) % 1440;
push @out, [ int2dt( $s ), int2dt( $newe ), $_->[2], $s, $newe
+ ];
$s = $newe +1;
}
( @out, [ int2dt( $s ), $_->[1], $_->[2], $s, $e ] );
} @listone, @listtwo;
pp \@res;
__END__
c:\test>junk33
[
["010000", "010004", 2],
["010005", "010015", 1],
["010200", "010206", 5],
["010207", "010211", 4],
["012359", "012359", 3],
["020000", "020001", 3],
]
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
| [reply] [d/l] |
|
This is super close to being exactly what I need! I tweaked the input arrays to test the borders of feasibility of inputs and am breaking things now. It is possible for a window to fall entirely within another, and depending on the V value, can change the behavior a little. If the input arrays are, for example:
my @listone = (['010000','010110',6],['010200','010210',5],['012350','
+020012',3]);
my @listtwo = (['010005','010015',1],['010207','010211',4],['012355','
+020003',1]);
Also, I'm still digesting your code and trying to figure out how it works. Still kinda new to perl :)
Thank you for the help!!! | [reply] [d/l] |
|
2350......0000......0010...
3333333333333333333333
11111111
option 1 - smaller range disappears and the larger becomes 2 ranges
3333333333
333333333333
option 2 - they becomes 4 ranges
3333
11111
1111
333333333
option 3 - other?
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
| [reply] [d/l] |
|
|
|
|
|
Re: Date Array Convolution
by choroba (Cardinal) on Nov 03, 2011 at 21:47 UTC
|
Update: The code is wrong. Do not use it. I am trying to write a different one.
Probably not a clever approach. The border cases might need some tweaking.
#!/usr/bin/perl
use Data::Dumper;
use warnings;
use strict;
sub combine {
my %result;
while (my $triple = shift) {
my ($left, $right, $value) = @$triple;
if (not exists $result{$left}{R}
or $result{$left}{R} > $value) {
$result{$left}{R} = $value;
}
if (not exists $result{$right}{L}
or $result{$right}{L} > $value) {
$result{$right}{L} = $value;
}
}
return \%result;
} # combine
sub dec {
my $time = shift;
my ($day, $hour, $min) = $time =~ /(..)(..)(..)/;
$min--;
if ($min < 0) {
$min = 59;
$hour--;
if ($hour < 0) {
$hour = 23;
$day--;
die if ($day < 0);
}
}
return sprintf '%02d%02d%02d', $day, $hour, $min;
} # dec
sub tangle {
my $combined = combine(@_);
my @keys = sort keys %$combined;
my $value;
my @result;
my $overlap;
for my $i (0 .. $#keys) {
if (defined $value) {
my $new = $combined->{$keys[$i]}{R};
my $old = $combined->{$keys[$i]}{L};
die if defined $old and $old < $value;
if (defined $new) {
$overlap++;
if ($new < $value) {
push @{ $result[-1] }, dec($keys[$i]), $value;
push @result, [$keys[$i]];
$value = $new;
}
} else {
$overlap--;
if (not $overlap) {
push @{ $result[-1] }, $keys[$i], $value;
undef $value;
} else {
my $next = $combined->{$keys[$i+1]}{L};
if ($next > $value) {
push @{ $result[-1] }, dec($keys[$i]), $value;
push @result, [$keys[$i]];
$value = $next;
}
}
}
} else { # not defined $value
$value = $combined->{$keys[$i]}{R};
die unless defined $value;
push @result, [$keys[$i]];
$overlap++;
}
}
return @result;
} # tangle
sub daysplit {
return map {
my ($start, $end, $value) = @$_;
my $from = 0 + substr $start, 0, 2;
my $to = 0 + substr $end, 0, 2;
if ($from < $to) {
my $split;
my $newfrom = sprintf('%02d', $from) . '2359';
$split = [[$start, $newfrom, $value]];
push @$split,
map { [sprintf('%02d', $_) . '0000',
sprintf('%02d', $_) . '2359',
$value] } $from + 1 .. $to - 1;
my $newto = sprintf('%02d', $to) . '0000';
push @$split, [$newto, $end, $value];
@$split;
} else {
$_;
}
} @_;
} # daysplit
my @listone = (['010000','010010',2],['010200','010210',5],['012359','
+020001',3]);
my @listtwo = (['010005','010015',1],['010207','010211',4]);
my @result = daysplit(tangle(@listone, @listtwo));
print Dumper \@result;
| [reply] [d/l] |
|
**Edited**
1) Your code is awesome, but I understand very little of it.
2) It works almost perfectly, except in the case of overlapping windows, the times are a little off and I can't figure out why.
For example with the input:
my @listone = (['010000','010110',6],['010200','010210',5],['012350','
+020012',3]);
my @listtwo = (['010005','010015',1],['010207','010211',4],['012355','
+020003',1]);
Do you mind adding some comments so I can figure out how you did that?
Thanks!
| [reply] [d/l] |
|
OK, here is the new code I wrote on the underground on my way to work :-) I used OO this time. The second case is your second example input, if your expected output is different, can you show it?
Update: I read the discussion you had with BrowserUk and tried to accommodate the code appropriately.
Update2: zero-intervals removed from output.
| [reply] [d/l] |
|
|
|
I think I finally found a way to break the days apart that's a little different than yours:
@combined = sort {$a->[0] <=> $b->[0]} (@listone,@listtwo);
for ($i=0; $i<scalar(@combined); $i++) {
if (substr($combined[$i][0],0,2) != substr($combined[$i][1],0,2))
+ {
splice(@combined,$i+1,0,[substr($combined[$i][1],0,2)."0000"
+,$combined[$i][1],$combined[$i][2]]);
$combined[$i][1] = substr($combined[$i][0],0,2)."2359";
}
}
I don't really understand though what you did for the day overlap piece. Do you mind adding comments or explaining the chunks? I'm still kinda new to perl :/
Thanks for the help! | [reply] [d/l] |
|
Your code works only if the day difference is one. My code should work for two, three and more days.
| [reply] |
|
Re: Date Array Convolution
by BrowserUk (Patriarch) on Nov 04, 2011 at 21:57 UTC
|
A completely different approach that turns out to be far simpler and more robust. (Ie. It actually works for all possibilities:)
In addition, it avoids a bunch of convoluted range comparisons and the need for sorting, whilst producing fully sorted output. A win-win-win for going back to the drawing board.
#! perl -slw
use strict;
use Data::Dump qw[ pp ];
sub dhm2int {
my( $d, $h, $m ) = unpack '(A2)*', $_[0];
return ( ( $d - 1 ) * 24 + $h ) * 60 + $m;
}
sub int2dhm {
sprintf "%02d%02d%02d", int($_[0]/1440)+1, int($_[0]/60)%24, $_[0]
+%60;
}
#my @listone =
(['010000','010010',2],['010200','010210',5],['012359','020001',3]
+);
#my @listtwo =
(['010005','010015',1],['010207','010211',4]);
my @listone =
(['010000','010110',6],['010200','010210',5],['012350','020012',3]
+);
my @listtwo =
(['010005','010015',1],['010207','010211',4],['012355','020003',1]
+);
my @expd = map {
my $in = $_;
my $s = dhm2int( $in->[ 0 ] );
my $e = dhm2int( $in->[ 1 ] );
my @out;
while( int( $s / 1440 ) != int( $e / 1440 ) ) {
my $newe = ( int( $s / 1440 ) +1 ) * 1440 -1;
push @out, [ int2dhm( $s ), int2dhm( $newe ), $in->[2], $s, $n
+ewe ];
$s = $newe +1;
}
( @out, [ int2dhm( $s ), $in->[1], $in->[2], $s, $e ] );
} @listone, @listtwo;
my( @tally, @id );
for my $e ( 0 .. $#expd ) {
my $r = $expd[ $e ];
for my $i ( $r->[ 3 ] .. $r->[ 4 ] ) {
if( !defined( $tally[ $i ] ) or $tally[ $i ] > $r->[ 2 ] ) {
$tally[ $i ] = $r->[ 2 ];
$id[ $i ] = $e;
}
}
}
my @res;
my $i = 0;
while( $i < $#id ) {
++$i until defined $id[ $i ];
my $id = $id[ $i ];
my $start = $i;
++$i while defined( $id[ $i ] ) and $id[ $i ] == $id;
my $end = $i - 1;
push @res, [ int2dhm( $start ), int2dhm( $end ), $tally[ $start ]
+];
}
pp \@res;
__END__
C:\test>935755.pl
[
["010000", "010004", 2],
["010005", "010015", 1],
["010200", "010206", 5],
["010207", "010211", 4],
["012359", "012359", 3],
["020000", "020001", 3],
]
C:\test>935755.pl
[
["010000", "010004", 2],
["010005", "010015", 1],
["010200", "010206", 5],
["010207", "010211", 4],
["012359", "012359", 3],
["020000", "020001", 3],
]
With the rise and rise of 'Social' network sites: 'Computers are making people easier to use everyday'
Examine what is said, not who speaks -- Silence betokens consent -- Love the truth but pardon error.
"Science is about questioning the status quo. Questioning authority".
In the absence of evidence, opinion is indistinguishable from prejudice.
| [reply] [d/l] |
|
Sweet jesus and sweet sassy molassy, you *Sir* are a gentleman AND a scholar.
Next step is for me to a) apply it to the thousands of data points, and b) figure out how it works!
You guys at perlmonks are amazing at figuring code out and helping come up with awesome solutions, but for newbie programmers, it's hard to learn from your examples because they're complex and have no comments! If you don't mind, could you help me understand a little more how your code works, maybe in PM? I hate taking without understanding the how and why behind the scenes. Or maybe I could just ask a few questions?
As an aside, I figured out a solution as well, but it took me about 15 hours, was ~250 lines of code and went through every type conditional overlap case (ie, a is within b, b is within a, a begins overlap with b, b begins overlap with a, a and b start together b ends first, a and b start together a ends first, a and b end together a starts first, a and b end together b starts first .... a lot of 'if thens', and for each of those a case where a is bigger and a case where b is bigger). Your solution is so much more elegant and ... win.
Thank you for the help!!!
-Alan
| [reply] |
|
| [reply] [d/l] [select] |
|
|
|
Re: Date Array Convolution
by alanonymous (Sexton) on Nov 10, 2011 at 06:11 UTC
|
BrowserUK and Choroba,
This is the solution I've finally ended up with that works exactly as it should. I think structurally I borrowed more from Browser, but I wanted to thank you both for the help and code examples.
Here are the two input files I am using as examples:
number1.mrg
DTG/291505ZDEC11//
XXX/XXX/XXX/301200ZDEC11/021200ZJAN12//
/301226/301227/2/2/001.4/
/302350/310005/2f2f2f2f/g4g4g4g4g4g4g4g4g4g4g4g4g4g4g4g/012.4/
/312357/010003/2f2f2f2f/g4g4g4g4g4g4g4g4g4g4g4g4g4g4g4g/012.7/
//
/
number1.dat
DTG/291500ZDEC11//
XXX/XXX/XXX/301200ZDEC11/031200ZJAN12//
/301222/301232/234/4234/011.0/
/301240/301250/asdf/fdsa/011.3/
/302340/302355/9j9js9j9j9jf9sjfd/9j9sfj9df9323/010.0/
/302359/310002/kfjakdjfakdfasdf/salkdjfaklsdjflkasjd/008.1/
/312359/010001/f333333/f3333333333/002.2/
//
/
And then the code for the project:
#
# this assumes the time span on input docs will be 27 days or less
# this assumes that the current year is less than 2100
# this asssumes that no single closure will cover more than 24 hour
+s
# this assumes that the date validity range between two input fi
+les always match
#
use strict;
use warnings;
use Time::Local;
sub fmon {
if ($_[0] =~ /^\d{6}\wJAN\d{2}$/i) {return "00";}
if ($_[0] =~ /^\d{6}\wFEB\d{2}$/i) {return "01";}
if ($_[0] =~ /^\d{6}\wMAR\d{2}$/i) {return "02";}
if ($_[0] =~ /^\d{6}\wAPR\d{2}$/i) {return "03";}
if ($_[0] =~ /^\d{6}\wMAY\d{2}$/i) {return "04";}
if ($_[0] =~ /^\d{6}\wJUN\d{2}$/i) {return "05";}
if ($_[0] =~ /^\d{6}\wJUL\d{2}$/i) {return "06";}
if ($_[0] =~ /^\d{6}\wAUG\d{2}$/i) {return "07";}
if ($_[0] =~ /^\d{6}\wSEP\d{2}$/i) {return "08";}
if ($_[0] =~ /^\d{6}\wOCT\d{2}$/i) {return "09";}
if ($_[0] =~ /^\d{6}\wNOV\d{2}$/i) {return "10";}
if ($_[0] =~ /^\d{6}\wDEC\d{2}$/i) {return "11";}
}
sub m2d {
my @t = localtime($_[0]*60);
return sprintf "%02d%02d%02d", $t[3], $t[2], $t[1];
}
my ($d,$t);
foreach (<*>) {
$d = $_ if (/\.dat$/i);
$t = $_ if (/\.mrg$/i);
}
my (@dl,@tl);
if (open(D, $d)) {@dl = <D>; close(D); print "Found $d\n";} else {prin
+t "Error: missing XXX.\n";}
if (open(T, $t)) {@tl = <T>; close(T); print "Found $t\n";} else {prin
+t "Error: missing YYY.\n";}
my (@big,@n,@m);
for (@dl,@tl) {
chomp($_);
if (/\w{4}\d{2}\/\d{6}\w{4}\d{2}\/\/$/) {@n = split(/\//);}
if (/^\/\d{6}\/\d{6}\//) {
@m = split(/\//);
my ($t1,$t2);
if (substr($m[1],0,2) < substr($n[3],0,2)) {
if (fmon($n[3]) eq 11) {
$t1 = timelocal(0,substr($m[1],4,2),substr($m[1],2,2),
+substr($m[1],0,2),fmon($n[4]),"20".substr($n[4],10,2))/60;
} else {
$t1 = timelocal(0,substr($m[1],4,2),substr($m[1],2,2),
+substr($m[1],0,2),fmon($n[4]),"20".substr($n[3],10,2))/60;
}
} else {
$t1 = timelocal(0,substr($m[1],4,2),substr($m[1],2,2),subs
+tr($m[1],0,2),fmon($n[3]),"20".substr($n[3],10,2))/60;
}
if (substr($m[2],0,2) < substr($n[3],0,2)) {
if (fmon($n[3]) eq 11) {
$t2 = timelocal(0,substr($m[2],4,2),substr($m[2],2,2),
+substr($m[2],0,2),fmon($n[4]),"20".substr($n[4],10,2))/60;
} else {
$t2 = timelocal(0,substr($m[2],4,2),substr($m[2],2,2),
+substr($m[2],0,2),fmon($n[4]),"20".substr($n[3],10,2))/60;
}
} else {
$t2 = timelocal(0,substr($m[2],4,2),substr($m[2],2,2),subs
+tr($m[2],0,2),fmon($n[3]),"20".substr($n[3],10,2))/60;
}
push @big,[$t1,$t2,$m[5]];
}
}
my %values = ();
for my $e (0 .. $#big) {
my $l = $big[$e];
for my $min ($l->[0] .. $l->[1]) {
if ((!exists $values{$min}) or ($values{$min} > $l->[2])) {
$values{$min} = $l->[2];
}
}
}
print "...processing...\n";
my @res1;
my $s = timelocal(0,substr($n[3],4,0),substr($n[3],2,0),substr($n[3],0
+,2),fmon($n[3]),"20".substr($n[3],10,2))/60;
my $e = timelocal(0,substr($n[4],4,0),substr($n[4],2,0),substr($n[4],0
+,2),fmon($n[4]),"20".substr($n[4],10,2))/60;
while ($s < $e) {
++$s until exists $values{$s} or $s == $e;
my ($val,$start,$end);
if ($s != $e) {
$val = $values{$s};
$start = $s;
++$s while exists $values{$s} and $values{$s} == $val;
$end = $s - 1;
push @res1,[m2d($start),m2d($end),$val];
}
}
my @res2;
foreach (@res1) {
if (substr(@$_[0],0,2) == substr(@$_[1],0,2) ) {
push @res2,[@$_[0],@$_[1],@$_[2]];
} else {
push @res2,[@$_[0],substr(@$_[0],0,2)."2359",@$_[2]];
push @res2,[substr(@$_[1],0,2)."0000",@$_[1],@$_[2]];
}
}
open (C,'>CDA.txt');
my @last = ("","","");
print C "To do later: ... a lot of specific formatting work(easy)\n";
foreach (@res2) {
if (substr($last[0],0,2) ne substr(@$_[0],0,2)) { print C "\n"; }
print C @$_[0]," ",@$_[1]," ",@$_[2],"\n";
$last[0] = @$_[0];
}
close(C);
print "Complete.\n";
print "\nPress Enter to exit.\n";
my $end = <STDIN>;
The way I see this being used is thrown in a directory with the mrg and dat files (with LOTS and LOTS of times listed), run, and the resulting CDA file has the correct data. Do you guys see any glaring errors? In testing it, I can't find any errors yet... I think it should work for leap years, all month and year rollovers, etc. I tried to be conniving in my input test!
I will say though, that through this, I've learned a ton (and my regex look better!).
Thanks!!
-Alan | [reply] [d/l] [select] |