一、HttpClient概述

1、百科简介

HttpClient 是Apache Jakarta Common 下的子项目,可以用来提供高效的、最新的、功能丰富的支持 HTTP 协议的客户端编程工具包,并且它支持 HTTP 协议最新的版本和建议。

 

2、主要功能

(1)实现了所有 HTTP 的方法(GET,POST,PUT,HEAD 等)
(2)支持自动转向
(3)支持 HTTPS 协议
(4)支持代理服务器等

 

我们用它来抓取页面,用于之后对页面进行解析;

 

二、使用演示

0、Maven依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.example</groupId>
<artifactId>ZB2020</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.12</version>
</dependency>
</dependencies>
</project>

 

1、无参Get请求

package com.zb.crawler.httpclient;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

//无参数的get请求
public class TestGet {
public static void main(String[] args) throws IOException {
//1、创建httpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2、输入网址,配置HttpGet请求
HttpGet httpGet = new HttpGet("http://www.itheima.com");
//3、发起请求,并将相应保存到response对象
CloseableHttpResponse response = httpClient.execute(httpGet);
//4、如果相应状态码等于200,将相应内容解析成字符串并输出
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
System.out.println(content);
}
//5、释放response对象和httpClient对象
response.close();
httpClient.close();
}
}

 

2、带参Get请求

package com.zb.crawler.httpclient;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

//带参数的get请求
public class TestGetP {
public static void main(String[] args) throws Exception {
//1、创建httpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2、输入网址,配置HttpGet请求
//2.1、创建uriBuilder,输入网址
URIBuilder builder = new URIBuilder("http://yun.itheima.com/search");
//2.2、设置参数
builder.setParameter("keys","Java");
//2.3、配置HttpGet请求
HttpGet httpGet = new HttpGet(builder.build());
//3、发起请求,并将相应保存到response对象
CloseableHttpResponse response = httpClient.execute(httpGet);
//4、如果相应状态码等于200,将相应内容解析成字符串并输出
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
System.out.println(content);
}
//5、释放response对象和httpClient对象
response.close();
httpClient.close();
}
}

 

3、无参Post请求

package com.zb.crawler.httpclient;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

//无参的post
public class TestPost {
public static void main(String[] args) throws Exception {
//1、创建httpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2、输入网址,配置HttpPost请求
HttpPost httpPost = new HttpPost("http://www.itheima.com");
//3、发起请求,并将相应保存到response对象
CloseableHttpResponse response = httpClient.execute(httpPost);
//4、如果相应状态码等于200,将相应内容解析成字符串并输出
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
System.out.println(content);
}
//5、释放response对象和httpClient对象
response.close();
httpClient.close();
}
}

 

4、带参Post请求

package com.zb.crawler.httpclient;

import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

import java.util.ArrayList;
import java.util.List;

//带参的post
public class TestPostP {
public static void main(String[] args) throws Exception {
//1、创建httpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2、输入网址,配置HttpPost请求
HttpPost httpPost = new HttpPost("http://yun.itheima.com/search");
//声明List,封装表单中的参数
List<NameValuePair> pairs = new ArrayList<>();
pairs.add(new BasicNameValuePair("keys","Java"));
//创建表单的Entity对象,参数1:表单参数列表;参数2:参数编码;
UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(pairs, "utf8");
//设置表单的Entity对象到Post请求中
httpPost.setEntity(formEntity);
//3、发起请求,并将相应保存到response对象
CloseableHttpResponse response = httpClient.execute(httpPost);
//4、如果相应状态码等于200,将相应内容解析成字符串并输出
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
System.out.println(content);
}
//5、释放response对象和httpClient对象
response.close();
httpClient.close();
}
}

 

5、连接池

package com.zb.crawler.httpclient;

import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;

import java.io.IOException;

//连接池
public class TestPool {
public static void main(String[] args) throws Exception {
//1、创建连接池管理器
PoolingHttpClientConnectionManager manager = new PoolingHttpClientConnectionManager();
//2、设置最大连接数
manager.setMaxTotal(200);
//3、设置每个主机的并发数
manager.setDefaultMaxPerRoute(20);
//4、使用连接池管理器发起请求
doGet(manager);
doGet(manager);
}
//4、使用连接池管理器发起请求
private static void doGet(PoolingHttpClientConnectionManager manager) throws Exception {
//4.1、从管理器中获取HttpClient对象
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(manager).build();
//4.2、接下来就是正常的使用了,不能关闭httpClient,由管理器处理
//2、输入网址,配置HttpGet请求
HttpGet httpGet = new HttpGet("http://www.itheima.com");
//3、发起请求,并将相应保存到response对象
CloseableHttpResponse response = httpClient.execute(httpGet);
//4、如果相应状态码等于200,将相应内容解析成字符串并输出
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
System.out.println(content);
}
//5、释放response对象和httpClient对象,不能关闭httpClient,由管理器处理
response.close();
// httpClient.close();
}
}

 

6、请求参数

package com.zb.crawler.httpclient;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

//配置(请求参数)测试
public class TestConfig {
public static void main(String[] args) throws Exception {
//1、创建httpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2、输入网址,配置HttpGet请求
HttpGet httpGet = new HttpGet("http://www.itheima.com");
//2.1、配置请求参数
RequestConfig requestConfig = RequestConfig.custom()
.setConnectTimeout(1000)//设置创建连接的最长时间
.setConnectionRequestTimeout(500)//设置获取连接的最长时间
.setSocketTimeout(10 * 1000)//设置数据传输的最长时间
.build();
//2.2、设置请求参数
httpGet.setConfig(requestConfig);
//3、发起请求,并将相应保存到response对象
CloseableHttpResponse response = httpClient.execute(httpGet);
//4、如果相应状态码等于200,将相应内容解析成字符串并输出
if (response.getStatusLine().getStatusCode() == 200) {
String content = EntityUtils.toString(response.getEntity(), "UTF-8");
System.out.println(content);
}
//5、释放response对象和httpClient对象
response.close();
httpClient.close();
}
}