学习本教程前,先得了解http协议.
心血来潮,来搞点事情.
一个简单的爬虫
主要步骤:
1. CloseableHttpClient httpclient = HttpClients.createDefault();获取一个httpclient,即客户端,相当于浏览器
2. HttpGet httpget = new HttpGet(“http://bkjw.sxu.edu.cn/“);构建一个请求,主要有HttpGet和HttpPost两种
3. ResponseHandler< String > responseHandler = new ResponseHandler< String >()构建响应处理
4. String responseBody = httpclient.execute(httpget, responseHandler);使用客户端执行请求信息
5. httpclient.close();关闭客户端
import java.io.IOException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
/**
* This example demonstrates the use of the {@link ResponseHandler} to simplify
* the process of processing the HTTP response and releasing associated resources.
*/
public class ClientWithResponseHandler {
public final static void main(String[] args) throws Exception {
CloseableHttpClient httpclient = HttpClients.createDefault();
try {
HttpGet httpget = new HttpGet("http://bkjw.sxu.edu.cn/");
System.out.println("Executing request " + httpget.getRequestLine());
// Create a custom response handler
ResponseHandler<String> responseHandler = new ResponseHandler<String>() {
@Override
public String handleResponse(
final HttpResponse response) throws ClientProtocolException, IOException {
int status = response.getStatusLine().getStatusCode();
if (status >= 200 && status < 300) {
HttpEntity entity = response.getEntity();
return entity != null ? EntityUtils.toString(entity) : null;
} else {
throw new ClientProtocolException("Unexpected response status: " + status);
}
}
};
String responseBody = httpclient.execute(httpget, responseHandler);
System.out.println("----------------------------------------");
System.out.println(responseBody);
} finally {
httpclient.close();
}
}
}