use LWP::UserAgent; use utf8; use DBI; $user="root"; $passwd='xxx'; $dbh=""; $dbh = DBI->connect("dbi:mysql:database=zjzc_vote;host=14.5.5.57;port=3306",$user,$passwd) or die "can't connect to database ". DBI-errstr; $dbh->do("SET NAMES utf8"); use POSIX; use Data::Dumper; use HTML::TreeBuilder; open DATAFH,">data.html" || die "open data file failed:$!"; my $ua = LWP::UserAgent->new; $ua->timeout(10); $ua->env_proxy; $ua->agent("Mozilla/8.0"); my $response = $ua->get('http://data.10jqka.com.cn/financial/yjyg/'); if ($response->is_success) { print DATAFH $response->decoded_content; # or whatever # print $response->decoded_content; # or whatever use HTML::TreeBuilder::XPath; $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file( "data.html"); }; my $title= $tree->findvalue('/html/body//span[@class="text-value"]'); print "\$title is $title\n"; my @pages=$tree->find_by_tag_name('a'); #@urlall除了包含每个类别的文章,还包含阅读排行里的文章 foreach (@pages) { @titlepage = $_->attr('page'); foreach (@titlepage) { if ($_){ if ( $_ > $max ){ $max=$_; }; ###获取版块中每个页面的url }; }; }; print "\$max is $max\n"; for ($m=1;$m<=$max; $m++){ my @arr1= $tree->find_by_tag_name("tr") ; shift @arr1; foreach my $row ( @arr1) { my @arr2= $row->content_list; my $str1= $arr2[0]->as_text; my $str2= $arr2[1]->as_text; my $str3= $arr2[2]->as_text; my $str4= $arr2[3]->as_text; my $str5= $arr2[4]->as_text; my $str6= $arr2[5]->as_text; my $str7= $arr2[6]->as_text; my $str8= $arr2[7]->as_text; print $str1, $str2, $str3, $str4, $str5, $str6, $str7,$str8."\n"; open( E, ">>", "$title-$m.txt" ); print E ($str1."|".$str2."|".$str3."|".$str4."|".$str5."|".$str6."|".$str7."|".$str8."\n"); close E; } }
perl 爬取同花顺数据
转载本文章为转载内容,我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题,欢迎原作者联系我们进行内容更正或删除文章。
下一篇:vi/vim下看十六进制文件
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章