#!/usr/bin/perl while (<>) { s/<.+?>/ /sgi; # simple html replacer s/"/ /gi; # get rid of html quotes s/ / /gi; # get rid of non breaking spaces print; }