#!usr/bin/perl use warnings; use strict; use Data::Dumper; $Data::Dumper::Sortkeys =1; my %hash; sub get_html { while (<DATA>) { if (/<html>/ .. /<\/html>/) ##see [id://525392] { chomp; push @{$hash{DATA}},$_; } } } while (<DATA>) { if ( (my ($name, $date) = m/^\s*([\w ]+):\s+([\w-]+)/)) { $hash{$name}=$date; } if (my ($filename) = m/^\s*<FILENAME>\s*(\w+ex(-)?21.*.htm)/i) { $hash{FILENAME}=$filename; get_html(); } } print Dumper \%hash; =Prints: ********* $VAR1 = { 'CENTRAL INDEX KEY' => '0000786368', 'CONFORMED PERIOD OF REPORT' => '20081231', 'DATA' => [ '<html>', 'blah ', 'smore blah', 'blahblah', ' **** BODY OF TEXT I WISH TO EXTRACT ***** +', '</html>' ], 'DATE AS OF CHANGE' => '20090331', 'FILED AS OF DATE' => '20090331', 'FILENAME' => 'v144610_ex21.htm', 'FORM TYPE' => '10-K' }; =cut __DATA__ *********** Sample text *************** CONFORMED PERIOD OF REPORT: 20081231 ------ individual +line I want FILED AS OF DATE: 20090331 ------ individual line I + want DATE AS OF CHANGE: 20090331 ------ individual line +I want CENTRAL INDEX KEY: 0000786368 ------ individual line + I want FORM TYPE: 10-K ------ individual line I want Whole buncha text here ……………. </DOCUMENT> <DOCUMENT> <TYPE>EX-21 <SEQUENCE>7 <FILENAME>v144610_ex21.htm -----------My starting point <TEXT> <html> blah smore blah blahblah **** BODY OF TEXT I WISH TO EXTRACT ***** </html> </TEXT> </DOCUMENT> ----------- My ending point **********End of sample text ***********
In reply to Re: Extract individual lines and block of text from large files
by Marshall
in thread Extract individual lines and block of text from large files
by wrkrbeee
| For: | Use: | ||
| & | & | ||
| < | < | ||
| > | > | ||
| [ | [ | ||
| ] | ] |