项目源代码 https://gitee.com/fakerlove/jsoup


文章目录

  • 2. http-client 讲解
  • 2.1 get 请求
  • 2.2 get带请求
  • 工具类
  • 发送请求
  • 2.3 Post 请求
  • 2.4 Post 带参数
  • 2.5 连接池
  • 2.6 参数


2. http-client 讲解

2.1 get 请求

请求的网址

https://www.baidu.com

java 代码

package com.ak;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;


//@Slf4j
public class demo {

    public static void main(String[] args) {
        // 打开浏览器
        CloseableHttpClient aDefault = HttpClients.createDefault();
        // 输入网址 https://movie.douban.com/chart
        HttpGet httpGet=new HttpGet("https://www.baidu.com");
        CloseableHttpResponse response=null;
        try {
            // 获取响应内容

           response= aDefault.execute(httpGet);
          // System.out.println(response);
            if(response.getStatusLine().getStatusCode()==200){
                String content = EntityUtils.toString(response.getEntity());
                System.out.println(content.length());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            // 关闭流
            if(response!=null){
                try {
                    response.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

    }
}

2.2 get带请求

请求的网址

https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=电影

工具类

package com.ak.utils;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;

public class HttpUtils {

    public static PoolingHttpClientConnectionManager cm;
    public static ArrayList<String> agents;

    static  {
        // 创建连接池管理器
        cm = new PoolingHttpClientConnectionManager();
        // 设置连接数
        cm.setMaxTotal(100);
        // 设置每个主机(理解为网站,如:百度10个、网易10个)的最大连接数
        cm.setDefaultMaxPerRoute(10);

        //初始化 User-Agent 信息
        agents = new ArrayList<String>();
        // 添加 User-Agent 信息
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36");
        agents.add("Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
        agents.add("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11");
        agents.add("Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER");
        agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");
        agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36");
        System.out.println("<--------- HttpUtils initialization success --------->");
    }

    public static RequestConfig getConfig() {

        RequestConfig config = RequestConfig.custom()
                // 创建连接的最长时间
                .setConnectTimeout(1000)
                // 获取连接最长时间
                .setConnectionRequestTimeout(1000)
                // 数据传输最长时间
                .setSocketTimeout(10 * 1000)
                .build();
        return config;
    }
}

发送请求

package com.ak;

import com.ak.utils.HttpUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Random;


/**
 * 请求内容
 * 爬取豆瓣的内容,因为豆瓣使用了爬虫所以需要一些工具类
 */
public class demo2 {

    public static void main(String[] args) {
        // 打开浏览器
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(HttpUtils.cm).build();
        CloseableHttpResponse response=null;
        // 设置参数
        // 分析这个
        // https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=%E5%8A%A8%E4%BD%9C,%E7%94%B5%E5%BD%B1
        try {
            URIBuilder uriBuilder = new URIBuilder("https://movie.douban.com/tag/");
            uriBuilder.setParameter("sort","U");
            uriBuilder.setParameter("range","0,10");
            uriBuilder.setParameter("tags","电影");
            // 输入网址
            HttpGet httpGet=new HttpGet(uriBuilder.build());
            httpGet.setConfig(HttpUtils.getConfig());
            int agentNum = new Random().nextInt(HttpUtils.agents.size());
            httpGet.addHeader("User-Agent", HttpUtils.agents.get(agentNum));

            // 获取响应内容
           response= httpClient.execute(httpGet);
            
            if(response.getStatusLine().getStatusCode()==200){
                String content = EntityUtils.toString(response.getEntity());
                System.out.println(content);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            // 关闭流
            if(response!=null){
                try {
                    response.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

    }
}

爬取

https://www.baidu.com/s&wd=faker

类似于想要查找faker 的信息

package com.ak;

import com.ak.utils.HttpUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Random;


/**
 * 请求内容
 * 爬取豆瓣的内容,因为豆瓣使用了爬虫所以需要一些工具类
 */
public class demo2 {

    public static void main(String[] args) {
        // 打开浏览器
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(HttpUtils.cm).build();
        CloseableHttpResponse response=null;
        // 设置参数
        // 分析这个
        // https://movie.douban.com/tag/#/?sort=U&range=0,10&tags=%E5%8A%A8%E4%BD%9C,%E7%94%B5%E5%BD%B1
        try {
            // https://movie.douban.com/tag/
            URIBuilder uriBuilder = new URIBuilder("https://www.baidu.com/s");
            uriBuilder.setParameter("wd","faker");
//            uriBuilder.setParameter("range","0,10");
//            uriBuilder.setParameter("tags","电影");
            // 输入网址
            HttpGet httpGet=new HttpGet(uriBuilder.build());
            httpGet.setConfig(HttpUtils.getConfig());
            int agentNum = new Random().nextInt(HttpUtils.agents.size());
            httpGet.addHeader("User-Agent", HttpUtils.agents.get(agentNum));

            // 获取响应内容
           response= httpClient.execute(httpGet);
            
            if(response.getStatusLine().getStatusCode()==200){
                String content = EntityUtils.toString(response.getEntity());
                System.out.println(content);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            // 关闭流
            if(response!=null){
                try {
                    response.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

    }
}

2.3 Post 请求

请求网址

https://www.baidu.com

java代码

区别在于他们之间的HttpPost和HttpGet

package com.ak;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;


//@Slf4j
public class demo3 {

    public static void main(String[] args) {
        // 打开浏览器
        CloseableHttpClient aDefault = HttpClients.createDefault();
        // 输入网址 https://movie.douban.com/chart
        HttpPost httppost=new HttpPost("https://www.baidu.com");
        CloseableHttpResponse response=null;
        try {
            // 获取响应内容

           response= aDefault.execute(httppost);
          // System.out.println(response);
            if(response.getStatusLine().getStatusCode()==200){
                String content = EntityUtils.toString(response.getEntity());
                System.out.println(content);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            // 关闭流
            if(response!=null){
                try {
                    response.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

    }
}

2.4 Post 带参数

爬取网址

http://yun.itheima.com/search

java 代码

package com.ak;

import com.ak.utils.HttpUtils;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;


//@Slf4j
public class demo4 {

    public static void main(String[] args) {
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(HttpUtils.cm).build();
        CloseableHttpResponse response=null;
        // 设置参数
        // 分析这个
        // https://www.baidu.com/s
        try {
            // 封装表单对象
            List<NameValuePair> params=new ArrayList<>();
            params.add(new BasicNameValuePair("keys","java"));

            // 输入网址
            HttpPost httpPost=new HttpPost("http://yun.itheima.com/search");
            UrlEncodedFormEntity formEntity=new UrlEncodedFormEntity(params,"utf-8");
            int agentNum = new Random().nextInt(HttpUtils.agents.size());
            // 设置配置
            httpPost.setConfig(HttpUtils.getConfig());
            // 设置表单内容
            httpPost.setEntity(formEntity);
            // 设置代理
            httpPost.addHeader("User-Agent", HttpUtils.agents.get(agentNum));

            // 获取响应内容
            response= httpClient.execute(httpPost);
            System.out.println(response);
            if(response.getStatusLine().getStatusCode()==200){
                String content = EntityUtils.toString(response.getEntity());
                System.out.println(content);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            // 关闭流
            if(response!=null){
                try {
                    response.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

    }
}

2.5 连接池

package com.ak.utils;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;

public class HttpUtils {

    public static PoolingHttpClientConnectionManager cm;
    public static ArrayList<String> agents;

    static  {
        // 创建连接池管理器
        cm = new PoolingHttpClientConnectionManager();
        // 设置连接数
        cm.setMaxTotal(100);
        // 设置每个主机(理解为网站,如:百度10个、网易10个)的最大连接数
        cm.setDefaultMaxPerRoute(10);

        //初始化 User-Agent 信息
        agents = new ArrayList<String>();
        // 添加 User-Agent 信息
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36");
        agents.add("Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36");
        agents.add("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11");
        agents.add("Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER");
        agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");
        agents.add("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0");
        agents.add("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36");
        System.out.println("<--------- HttpUtils initialization success --------->");
    }


    /**
     * 获取页面源代码
     *
     * @param url 网页链接
     * @return 页面源代码
     */
    public String doGetHtml(String url) {
        // 通过连接池获取 httpClient
        CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(cm).build();

        HttpGet httpGet = new HttpGet(url);

        // 伪造 User-Agent(反反爬虫)
        // 生成一个范围在 0-x(不包含x)内的任意正整数
        int agentNum = new Random().nextInt(agents.size());

        httpGet.addHeader("User-Agent", agents.get(agentNum));

        // 设置请求信息
        httpGet.setConfig(getConfig());

        // 定义 response,方便 finally 中关闭
        CloseableHttpResponse response = null;
        try {
            response = httpClient.execute(httpGet);
            // 获取并判断,状态码是否正常(正常值:200)
            if (response.getStatusLine().getStatusCode() == 200) {
                // 判断响应体是否为空,不为空则获取内容
                if (response.getEntity() != null) {
                    // 获取响应体,并指定 UTF-8 编码
                    String content = EntityUtils.toString(response.getEntity(), "utf8");

                    return content;
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            // 判断并关闭 response
            if (response != null) {
                try {
                    response.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            // 不关闭 httpClient,交给连接池管理
        }
        System.out.println("<--------- doGetHtml() ERROR --------->");
        return "";
    }

    public static RequestConfig getConfig() {

        RequestConfig config = RequestConfig.custom()
                // 创建连接的最长时间
                .setConnectTimeout(1000)
                // 获取连接最长时间
                .setConnectionRequestTimeout(1000)
                // 数据传输最长时间
                .setSocketTimeout(10 * 1000)
                .build();
        return config;
    }
}

2.6 参数

public static RequestConfig getConfig() {

        RequestConfig config = RequestConfig.custom()
                // 创建连接的最长时间
                .setConnectTimeout(1000)
                // 获取连接最长时间
                .setConnectionRequestTimeout(1000)
                // 数据传输最长时间
                .setSocketTimeout(10 * 1000)
                .build();
        return config;
}