学习HTML::TokeParser,此模块比HTML:perl HTML::TokeParser_perlarser简单很多。

看代码:

#! /usr/local/bin/perl
use LWP::Simple;
use Data::Dumper;
use HTML::TokeParser;
use Encode;

my $content = get("http://www.sina.com.cn/");
Encode::_utf8_off($content);
open FH,">","1.html";
print FH $content;
system("dos2unix ./1.html 2> /dev/null");

my $p = HTML::TokeParser->new('1.html',
);

while( my $token = $p->get_tag("tr")){
        my $text = $p->get_trimmed_text("tr","/tr");
        print $text."\n";
}