一, 代码
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class Launch {
public static void main(String[] args) throws Exception {
URL url = new URL("https://www.baidu.com");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36");
BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(),"UTF-8"));
String msg = null;
while(null != (msg = br.readLine())){
System.out.println(msg);
}
br.close();
}
}
二, 特点
1, 模拟浏览器请求
三,封装
package net.ittimeline.java;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
/**
* 爬虫类
*/
public final class Spider {
public static String getMsg(String urlPath, String encoding){
StringBuilder sb = new StringBuilder();
try {
URL url = new URL(urlPath);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36");
BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(),encoding));
String msg = null;
while(null != (msg = br.readLine())){
sb.append(msg);
}
br.close();
}catch(Exception e){
e.printStackTrace();
}
return sb.toString();
}
/**
* 使用UTF-8
*/
public static String getMsg(String urlPath){
StringBuilder sb = new StringBuilder();
try {
URL url = new URL(urlPath);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setRequestProperty("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36");
BufferedReader br = new BufferedReader(new InputStreamReader(conn.getInputStream(),"UTF-8"));
String msg = null;
while(null != (msg = br.readLine())){
sb.append(msg);
}
br.close();
}catch(Exception e){
e.printStackTrace();
}
return sb.toString();
}
}