open( FILE, "C:/Users/li/data_collection/posts/165644996453.html" ) || die "couldn't open\n"; while ( ) { $data .= $_; } if ( $data =~ m/(?<=

)(.*)(?=<\/p>\s+