看到后很多人问,过滤网页信息什么的。其实用HTMLPARSER非常方便,现在没事要做个用代理访问网页的软件,首先是提取各个免费代理网站的代理:用到HTMLPARSER
1HTMLParser抓取网页_职场package com.pmjava.search;
2HTMLParser抓取网页_职场
3HTMLParser抓取网页_职场import java.io.BufferedReader;
4HTMLParser抓取网页_职场import java.io.File;
5HTMLParser抓取网页_职场import java.io.FileReader;
6HTMLParser抓取网页_职场import java.io.FileWriter;
7HTMLParser抓取网页_职场import org.htmlparser.Parser;
8HTMLParser抓取网页_职场import org.htmlparser.filters.NodeClassFilter;
9HTMLParser抓取网页_职场import org.htmlparser.tags.TableTag;
10HTMLParser抓取网页_职场import org.htmlparser.util.NodeList;
11HTMLParser抓取网页_职场
12HTMLParser抓取网页_HTMLParser_12public class Search {
13HTMLParser抓取网页_休闲_14
14HTMLParser抓取网页_HTMLParser_15    /**
15HTMLParser抓取网页_休闲_14     * @param args
16HTMLParser抓取网页_休闲_14     * @author Qing
17HTMLParser抓取网页_休闲_14     * @throws Exception
18HTMLParser抓取网页_职场_20     */

19HTMLParser抓取网页_职场_21    public static void main(String[] args) throws Exception {
20HTMLParser抓取网页_休闲_14        String[] url= new String[4] ;
21HTMLParser抓取网页_休闲_14        url[0] = "[url]http://www.cnproxy.com/proxy1.html[/url]";
22HTMLParser抓取网页_休闲_14        String currentUrl = url[0] ;       
23HTMLParser抓取网页_休闲_14        String[] encoding = new String[4] ;   
24HTMLParser抓取网页_休闲_14        encoding[1] = "gb2312";
25HTMLParser抓取网页_休闲_14        String currentEncoding = encoding[1] ;
26HTMLParser抓取网页_休闲_14        Parser parser = new Parser() ;       
27HTMLParser抓取网页_休闲_14            parser.setURL(currentUrl) ;
28HTMLParser抓取网页_休闲_14            parser.setEncoding(currentEncoding) ;
29HTMLParser抓取网页_休闲_14            NodeClassFilter f=new NodeClassFilter(TableTag.class);
30HTMLParser抓取网页_休闲_14            NodeList nodelist =  parser.extractAllNodesThatMatch(f);
31HTMLParser抓取网页_休闲_14            String list=null;
32HTMLParser抓取网页_休闲_14            String []Temp  ;
33HTMLParser抓取网页_休闲_14            String []Temp1;
34HTMLParser抓取网页_职场_37            if (nodelist.size()>0){
35HTMLParser抓取网页_职场_39                for (int i = 0; i < nodelist.size(); i++)    {
36HTMLParser抓取网页_休闲_14                    TableTag linkTag = (TableTag)nodelist.elementAt(i);
37HTMLParser抓取网页_休闲_14                    list=linkTag.getChildrenHTML().replace("<tr>", "").replace("<td>", "").replace("<td>", "").replace("</td>", "").replace("</tr>", "").replace("<SCRIPT type=text/javascript>document.write(", "").replace(")</SCRIPT>", "").replace("p_w_picpathURl=","").replace("<td width=\"140\">IP:Port<td width=\"40\">Type<td width=\"90\">Speed<td width=\"160\"> Country/Area","").replace("", "");
38HTMLParser抓取网页_职场_20                }

39HTMLParser抓取网页_休闲_14                File file=new File("f://2.txt");
40HTMLParser抓取网页_休闲_14                FileWriter writer=new FileWriter(file,true);
41HTMLParser抓取网页_休闲_14                writer.write(list);
42HTMLParser抓取网页_休闲_14                writer.close();
43HTMLParser抓取网页_休闲_14                String readFile,writerFile = null,t,t1;
44HTMLParser抓取网页_休闲_14                FileReader br=new FileReader(file);
45HTMLParser抓取网页_休闲_14                BufferedReader bufread  =   new  BufferedReader(br);
46HTMLParser抓取网页_休闲_14                String port;               
47HTMLParser抓取网页_HTMLParser_52                String []port2={z,m,k,l,d,x,i,w,q,b};               
48HTMLParser抓取网页_职场_54                while ((readFile = bufread.readLine()) != null) {       
49HTMLParser抓取网页_休闲_14                    if(readFile.length()>1)
50HTMLParser抓取网页_休闲_57                    {
51HTMLParser抓取网页_休闲_14                     Temp=readFile.split("HTTP");
52HTMLParser抓取网页_休闲_14                     int a=Temp[0].trim().indexOf(":");
53HTMLParser抓取网页_休闲_14                    port=Temp[0].trim().substring(a,Temp[0].trim().length()).replace("\"", "").replace("+", "").replace(":","");
54HTMLParser抓取网页_休闲_14                    char []port1=port.toCharArray();
55HTMLParser抓取网页_休闲_14                    String temp1 = null,temp2 = "";
56HTMLParser抓取网页_休闲_14                    for(int j=0;j<port1.length;j++)
57HTMLParser抓取网页_休闲_65                    {                       
58HTMLParser抓取网页_休闲_14                        System.out.println(port1[j]);
59HTMLParser抓取网页_休闲_14                        for(int e=0;e<port2.length;e++)
60HTMLParser抓取网页_休闲_69                        {                           
61HTMLParser抓取网页_休闲_14                            if(String.valueOf(port1[j]).equals(port2[e]))
62HTMLParser抓取网页_HTMLParser_72                            {
63HTMLParser抓取网页_休闲_14                                temp2=temp2+temp1;
64HTMLParser抓取网页_职场_20                            }

65HTMLParser抓取网页_职场_20                        }

66HTMLParser抓取网页_职场_20                    }

67HTMLParser抓取网页_职场_20                    }

68HTMLParser抓取网页_职场_20                 }

69HTMLParser抓取网页_休闲_14                bufread.close();
70HTMLParser抓取网页_休闲_14                br.close();   
71HTMLParser抓取网页_职场_20            }

72HTMLParser抓取网页_休闲_14                   
73HTMLParser抓取网页_休闲_14       
74HTMLParser抓取网页_职场_20    }

75HTMLParser抓取网页_休闲_14
76HTMLParser抓取网页_休闲_14
77HTMLParser抓取网页_休闲_14
78HTMLParser抓取网页_休闲_89}

79HTMLParser抓取网页_职场