in reply to Re: HTML content extractor
in thread HTML content extractor

Or even simpler without the accumulating @array,

HTML::Parser->new(text_h => [sub{print @_}, "text"])->parse_file($file +);

Replies are listed 'Best First'.
Re^3: HTML content extractor
by Anonymous Monk on Oct 21, 2004 at 14:36 UTC
    A:link {color:#333333;text-decoration:none} A:visited {color:#333333;text-decoration:none} A:active {color:#333333;text-decoration:none} A:hover {text-decoration:underline; color:#0099ff;} .mp_bonmun { font-family: "µ¸¿ò"; font-size: 9pt; font-style: normal; line-height: 17pt; font-weight: normal; font-variant: normal; color: #333333; text-align: justify; text-indent: 10pt; } .mp_pop_title { font-family: "µ¸¿ò"; font-size: 10pt; font-weight: bold; color: #00067D; } .mp_4C { font-family: "±¼¸²"; font-size: 9pt; color: #4C4C4C; } .mp_point { font-family: "µ¸¿ò"; font-size: 9pt; font-style: normal; line-height: 17pt; font-weight: bold; font-variant: normal; color: #3399CC; } .mp_title1 { font-size: 9pt; font-style: normal; line-height: 17pt; font-weight: bold; font-variant: normal; color: #3495C2; font-family: "µ¸¿ò"; } .mp_title2 { font-family: "µ¸¿ò"; font-size: 10pt; font-style: normal; line-height: 17pt; font-weight: bold; font-variant: normal; color: #4E53A7; } .mp_title3 { font-size: 9pt; font-style: normal; line-height: 17pt; font-weight: bold; font-variant: normal; color: #F6A026; font-family: "µ¸¿ò"; } .mp_title4 { font-size: 9pt; font-style: normal; line-height: 17pt; font-weight: bold; font-variant: normal; color: #71C601; font-family: "µ¸¿ò"; } table { font-family: "µ¸¿ò"; font-size: 9pt; line-height: 17pt; color: #333333; text-align: justify; } .maintb table{ word-break:break-all; table-layout:fixed; white-space: nowrap; } .maintb td{ font-family: "µ¸¿ò"; font-size: 9pt; line-height: 17pt; color: #333333; text-align: justify; word-break:break-all; table-layout:fixed; } .input01 { background-color:white;border:1 groove #CCCCCC ; font-family:µ¸¿ò; font-size:9pt;font-color:#555555} .input02 { background-color:#f8f8f8;border:0 solid #D6D6D6 ; font-family:µ¸¿ò; font-size:9pt;font-color:#555555} #wow_box { width: 517; height: auto; overflow: auto; border:0 solid; background-color:#FFFFFF; scrollbar-3dlight-color:#CCCCCC; scrollbar-base-color: #FFFFFF; scrollbar-shadow-color:#CCCCCC; scrollbar-arrow-color: #888888; scrollbar-face-color: #FFFFFF; text-align: center; vertical-align: middle; } #agree_box { width: 509; height: 350; overflow: auto; padding:7px; border:1px solid #CCCCCC; background-color:#FFFFFF; font-size: 12px; line-height: 20px; scrollbar-3dlight-color:#CCCCCC; scrollbar-base-color: #FFFFFF; scrollbar-shadow-color:#CCCCCC; scrollbar-arrow-color: #888888; scrollbar-face-color: #FFFFFF; text-align: left; } #maga_box { width: 400; height: 120; overflow: auto; padding:7px; border:0 solid #CCCCCC; background-color:#FFFFFF; font-size: 12px; line-height: 20px; scrollbar-3dlight-color:#CCCCCC; scrollbar-base-color: #FFFFFF; scrollbar-shadow-color:#CCCCCC; scrollbar-arrow-color: #888888; scrollbar-face-color: #FFFFFF; text-align: left; } #pp_box { width: 312; height: 80; overflow: auto; padding:5px; background-color:#FFFFFF; font-size: 12px; line-height: 20px; scrollbar-3dlight-color:#CCCCCC; scrollbar-base-color: #FFFFFF; scrollbar-shadow-color:#CCCCCC; scrollbar-arrow-color: #888888; scrollbar-face-color: #FFFFFF; border-top: 0 dashed #CCCCCC; border-right: 0 dashed #CCCCCC; border-bottom: 0 dashed #CCCCCC; border-left: 0 dashed #CCCCCC; text-align: left; } .toc { font-family: "µ¸¿ò"; font-size: 12px; color: #333333; line-height: 20px; white-space: nowrap; } .toc td{ vertical-align: top; border-bottom-width: 0px; border-top-style: none; border-right-style: none; border-bottom-style: dashed; border-left-style: none; } .bar td{ font-family: "µ¸¿ò"; font-size: 12px; line-height: 14px; color: #FFFFFF; padding-top: 2px; } .page { font-family: "µ¸¿ò"; font-size: 11px; color: #3399CC; line-height: 20px; white-space: nowrap; } .pageform { font-family: "µ¸¿ò"; font-size: 11px; color: #3399CC; line-height: 14px; white-space: nowrap; border: 1px solid #CCCCCC; overflow: hidden; height: 14px; width: 30px; margin-top: 3px; margin-bottom: 3px; } .cateform { font-family: "µ¸¿ò"; font-size: 11px; color: #000000; line-height: 14px; white-space: nowrap; height: 14px; width: 130px; overflow: hidden; border-top: 1px solid #CCCCCC; border-right: 1px none #CCCCCC; border-bottom: 1px solid #CCCCCC; border-left: 1px none #CCCCCC; margin-top: 3px; margin-bottom: 3px; } .titleform { font-family: "µ¸¿ò"; font-size: 11px; color: #000000; line-height: 20px; white-space: nowrap; height: 14px; width: 240px; overflow: hidden; border: 1px solid #CCCCCC; margin-top: 3px; margin-bottom: 3px; } .staff { font-family: "µ¸¿ò"; font-size: 12px; color: #6699CC; text-decoration: none; } .staff a:link{ color:#AAAAAA; text-decoration:none; font-size: 11px; font-family: "Verdana", "Arial", "Helvetica", "sans-serif"; } .staff a:visited{color:#AAAAAA;text-decoration:none;font-size: 11px;font-family: "Verdana", "Arial", "Helvetica", "sans-serif";} .staff a:active{color:#AAAAAA;text-decoration:none;font-size: 11px;font-family: "Verdana", "Arial", "Helvetica", "sans-serif";} .staff a:hover{color:#3399CC;text-decoration:none;font-size: 11px;font-family: "Verdana", "Arial", "Helvetica", "sans-serif";} b { font-weight: bold; color: #3399CC; } .scb td{ font-family: "µ¸¿ò"; font-size: 12px; color: #336699; text-decoration: none; line-height: 24px; } .receipt td{ font-family: "µ¸¿ò"; font-size: 12px; color: #000000; text-decoration: none; line-height: 24px; } .login td{ font-family: "µ¸¿ò"; font-size: 12px; color: #336699; text-decoration: none; line-height: 16px; } .version { color:#FFFFFF; font-size: 10px; font-family: "Helvetica", "sans-serif", "Arial",; margin-bottom: -2px; margin-right: -20px; } .barlink a:link { color:#FFFFFF; text-decoration:none; font-size: 12px; font-family: "µ¸¿ò"; } .barlink a:visited { color:#FFFFFF; text-decoration:none; font-size: 12px; font-family: "µ¸¿ò"; } .barlink a:hover { color:#FFFFFF; text-decoration:none; font-size: 12px; font-family: "µ¸¿ò"; } .barlink a:active { color:#FFFFFF; text-decoration:none; font-size: 12px; font-family: "µ¸¿ò"; }