目录
二、工具类
三、工具方法
介绍:《众志成城抗肺炎,程序猿也发挥大作用》
二、工具类
1、HttpPojo.java
用于爬取请求的时候,模拟请求header
import java.io.Serializable;
/**
* Created by yjl on 2019/5/30.
*/
public class HttpPojo implements Serializable{
private static final long serialVersionUID = -2019661705306735496L;
private String httpIp;
private String httpHost;
private String httpAccept;
private String httpConnection;
private String httpUserAgent;
private String httpReferer;
private String httpOrigin;
private String httpCookie;
private String httpContentType;
public String getHttpCookie() {
return httpCookie;
}
public void setHttpCookie(String httpCookie) {
this.httpCookie = httpCookie;
}
public String getHttpIp() {
return httpIp;
}
public void setHttpIp(String httpIp) {
this.httpIp = httpIp;
}
public String getHttpHost() {
return httpHost;
}
public void setHttpHost(String httpHost) {
this.httpHost = httpHost;
}
public String getHttpAccept() {
return httpAccept;
}
public void setHttpAccept(String httpAccept) {
this.httpAccept = httpAccept;
}
public String getHttpConnection() {
return httpConnection;
}
public void setHttpConnection(String httpConnection) {
this.httpConnection = httpConnection;
}
public String getHttpUserAgent() {
return httpUserAgent;
}
public void setHttpUserAgent(String httpUserAgent) {
this.httpUserAgent = httpUserAgent;
}
public String getHttpReferer() {
return httpReferer;
}
public void setHttpReferer(String httpReferer) {
this.httpReferer = httpReferer;
}
public String getHttpOrigin() {
return httpOrigin;
}
public void setHttpOrigin(String httpOrigin) {
this.httpOrigin = httpOrigin;
}
public String getHttpContentType() {
return httpContentType;
}
public void setHttpContentType(String httpContentType) {
this.httpContentType = httpContentType;
}
}
用法:
public static String getStatisticsService(){
String url="https://ncov.dxy.cn/ncovh5/view/pneumonia";
//模拟请求
HttpPojo httpPojo = new HttpPojo();
httpPojo.setHttpHost("ncov.dxy.cn");
httpPojo.setHttpAccept("*/*");
httpPojo.setHttpConnection("keep-alive");
httpPojo.setHttpUserAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36");
httpPojo.setHttpReferer("https://ncov.dxy.cn");
httpPojo.setHttpOrigin("https://ncov.dxy.cn");
return null;
}
三、工具方法
1、httpSendGet()方法
是一个http请求方法,用于请求第三方网站数据
private static String httpSendGet(String url, Map paramObj, HttpPojo httpPojo){
String result = "";
String urlName = url + "?" + parseParam(paramObj);
BufferedReader in=null;
try {
URL realURL = new URL(urlName);
URLConnection conn = realURL.openConnection();
//伪造ip访问
String ip = randIP();
System.out.println("目前伪造的ip:"+ip);
conn.setRequestProperty("X-Forwarded-For", ip);
conn.setRequestProperty("HTTP_X_FORWARDED_FOR", ip);
conn.setRequestProperty("HTTP_CLIENT_IP", ip);
conn.setRequestProperty("REMOTE_ADDR", ip);
conn.setRequestProperty("Host", httpPojo.getHttpHost());
conn.setRequestProperty("accept", httpPojo.getHttpAccept());
conn.setRequestProperty("connection", httpPojo.getHttpConnection());
conn.setRequestProperty("user-agent", httpPojo.getHttpUserAgent());
conn.setRequestProperty("Referer",httpPojo.getHttpReferer()); //伪造访问来源
conn.setRequestProperty("Origin", httpPojo.getHttpOrigin()); //伪造访问域名
conn.connect();
Map<String, List<String>> map = conn.getHeaderFields();
for (String s : map.keySet()) {
//System.out.println(s + "-->" + map.get(s));
}
in = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));
String line;
while ((line = in.readLine()) != null) {
result += "\n" + line;
}
} catch (IOException e) {
e.printStackTrace();
}finally {
if (in!=null){
try {
in.close();
}catch (Exception e){
e.printStackTrace();
}
}
}
return result;
}
2、parseParam()方法
是一个解析map,将map的各个key和value用&拼接
public static String parseParam(Map paramObj){
String param="";
if (paramObj!=null&&!paramObj.isEmpty()){
for (Object key:paramObj.keySet()){
String value = paramObj.get(key).toString();
param+=(key+"="+value+"&");
}
}
return param;
}
3、randIP()方法
伪造ip地址方法
public static String randIP() {
Random random = new Random(System.currentTimeMillis());
return (random.nextInt(255) + 1) + "." + (random.nextInt(255) + 1)
+ "." + (random.nextInt(255) + 1) + "."
+ (random.nextInt(255) + 1);
}
4、getRegContent()方法
通过正则获取指定数据
public static String getRegContent(String reg,String content,int index){
Pattern pattern = Pattern.compile(reg); // 讲编译的正则表达式对象赋给pattern
Matcher matcher = pattern.matcher(content);
String group="";
while (matcher.find()){
group= matcher.group(index);
//System.out.println(group);
}
return group;
}