in reply to How to speed up/multi thread extract from txt files?
Since you haven't provided the request further information, here's an indication of what is possible. The following adaption of your posted sub run against 200MB of simulated data shows it doing single searches in ~1/2 second; 10 searches in ~4 seconds. Several runs of 100 searches all came in under 36 seconds.
This includes locating and extracting 3 values per your original. It could probably be optimised further but there's no point with guessed at test data.
The simulated data I used looks like this:
C:\test>u:head 661250.dat 1_3_prtnm_1441794 1_2_tname_1441794 1_2_mrslt_1 2_3_prtnm_0133611 2_2_tname_0133611 2_2_mrslt_2 3_3_prtnm_1469079 3_2_tname_1469079 3_2_mrslt_3 4_3_prtnm_0852340 ...
Probably not very realistic data, but also possibly more testing than the real stuff. Enjoy!
#! perl -slw use strict; use Data::Dump qw[ pp ]; use Benchmark::Timer; my $T = new Benchmark::Timer; our $N ||= 10; sub log{ warn "@_\n"; } sub trim{ chomp $_[ 0 ], $_[ 0 ]; } my $outputdir = "./"; ## extract all the param values from the stdf sub get_paramValue { my ($stdf,$lot,$oper,$sum,%param_flag) = @_; my ($output); print "Running with stdf:$stdf.\n"; &log("get_paramValue","Running with stdf:$stdf."); if( -e $stdf){ ## create the output file name, ## similar to the stdf name but with .log ext $output = $outputdir . $lot ."_" . $oper . "_" . $sum . ".log" +; open(OUT, ">", 'CON') or &log("get_paramValue","Can't write to + output: $output"); print OUT "tname,idx,param_val\n"; open(STDF, $stdf) or &log("get_paramValue","Die can't read from stdf:$stdf." +); sysread STDF, my $data, -s( $stdf ) or &log( "get_paramValue", "Die failed to slurp $stdf" ); close(STDF); my( @tmp, $testname, $testFound, $paramVal, $unit_count ); for my $testname ( keys %param_flag ) { if( my $p = 1 + index $data, "2_tname_$testname" ) { if( substr( $data, $p - 100, 200 ) =~ m[ 3_prtnm_ (.+?) \x0d \x0a .+? 2_tname_ $testname \x0d \x0a .+? 2_mrslt_ (.+?) \x0d \x0a ]x ) { print OUT "$testname, $1, $2"; } } else { warn "$testname not found"; } } close(OUT); } ## END IF return $output; } ## end sub my %tests = map{ sprintf( "%07d", 1+int rand 2750000 ), 1 } 1 .. $N; $T->start( $N ); get_paramValue( "661250.dat", '', '', '', %tests ); $T->stop( $N ); $T->report; __END__ C:\test>661250-2 -N=1 Running with stdf:661250.dat. get_paramValue Running with stdf:661250.dat. tname,idx,param_val 1552247, 1552247, 1262984 1 trial of 1 (791.111ms total) C:\test>661250-2 -N=1 Running with stdf:661250.dat. get_paramValue Running with stdf:661250.dat. tname,idx,param_val 2253510, 2253510, 133163 1 trial of 1 (487.220ms total) C:\test>661250-2 -N=1 Running with stdf:661250.dat. get_paramValue Running with stdf:661250.dat. tname,idx,param_val 1182816, 1182816, 931072 1 trial of 1 (666.104ms total) C:\test>661250-2 -N=10 Running with stdf:661250.dat. get_paramValue Running with stdf:661250.dat. tname,idx,param_val 0837639, 0837639, 451871 1578850, 1578850, 1077007 0215767, 0215767, 1434997 0030549, 0030549, 2068233 2460633, 2460633, 2590939 1851769, 1851769, 2228542 2347001, 2347001, 2690073 2504105, 2504105, 643507 0848717, 0848717, 489624 0699753, 0699753, 9312 1 trial of 10 (4.182s total) C:\test>661250-2 -N=10 Running with stdf:661250.dat. get_paramValue Running with stdf:661250.dat. tname,idx,param_val 0933816, 0933816, 2169700 1231827, 1231827, 2427794 2329964, 2329964, 1360851 1581872, 1581872, 1702403 2463654, 2463654, 2638116 1344452, 1344452, 15892 2367646, 2367646, 1144127 0380593, 0380593, 1458454 1167290, 1167290, 2542862 1310967, 1310967, 714033 1 trial of 10 (4.869s total) C:\test>661250-2 -N=10 Running with stdf:661250.dat. get_paramValue Running with stdf:661250.dat. tname,idx,param_val 0941033, 0941033, 211493 2263749, 2263749, 2393859 0018212, 0018212, 2123695 1610909, 1610909, 1885425 1996956, 1996956, 1679520 0783089, 0783089, 1634826 1020089, 1020089, 1082597 0681038, 0681038, 930891 1107285, 1107285, 168961 1516076, 1516076, 2160980 1 trial of 10 (4.275s total)
|
|---|