package ScrapeMyData; use Moo; use Mojo::DOM; use v5.36; has input => ( is => 'ro', required => 1 ); =head2 parse This method returns the extracted data from L. It first looks for blah blah to identify the start of the data, then looks for blah blah blah blah to identify the individual lines. Unfortunately I couldn't identify a pattern in the DIVs so I'm matching the class names and this is very likely to break. TODO: scan the file for the most common class="..." and then guess that that class is the one used on data rows. =cut sub parse($self) { my $dom = Mojo::DOM->new( $self->input ); ... # return an arrayref of data } 1; ##

##

use Test2::V0;
use v5.36;

use ScrapeMyData;

my $scraper= ScrapeMyData->new(input => <<~'END');
  
  ...
  
  ...
  
  Sam Namett, MD - Physician - Interventional Orthopedics
  ...Exosomes are nanovesicles (30-200 nm) found in extracellular
  space of various cell types, and in biofluids; having diverse
  functions including intracellular ...
  
  ...
  
  
  END

is( $scraper->parse,
  [
    ...
    { author => 'Sam Namett',
      title => 'Interventional Orthopedics ...',
    }
    ...
  ],
  'parse'
);


##

##

  foreach (@files7){
    print "Newparse2 == Parsing file: $_ \n";
    my $scraper= ScapeMyData->new(input => path($_)->slurp_utf8);
    my $data= $scraper->parse;