#!/usr/bin/perl -w

  use strict;

  my $filename = $ARGV[0] || "temp.html";

  my $open;

  undef $/;             # undefine all line separators
  open( FILE, $filename ) or die "Couldn´t open $filename : $!\n";
  $open = <FILE>;	# This slurps the whole file into one scalar (instead of an array)   close FILE;

  # I'll take a simplicistic approach that assumes that
  # the only place where a ">" occurs is at the start of
  # a tag. This does fail when you have for example :
  # <IMG src="less.png" alt="a > b">
  # which is valid HTML from what I know.
  # I also ignore scripts and comment handling.

  while ($open) {
    # Match text followed by a tag into $1 and (if a tag follows exists) $2:
    $open =~ s/^([^<]+)?(<[^>]+>)?//;
    print "Text : $1\n" if $1;
    print "HTML: $2\n" if $2;
  };

  # the real meat of the code is the "s///;" line
  # it works as follows :
  # The two parenthesed parts capture stuff,
  # the first parentheses capture non-tagged text
  # the second parentheses capture text that is
  # within "<" and ">"
  # one or both of the parentheses are allowed to be empty
  # Everything that is found is deleted from the start of
  # the string.
  # repeat as long as there is stuff in the slurped line