思路

系统中自定义了404,自定义404页面的mime-type是:text/html;charset=UTF-8,使用HTTP HEAD请求,判断有没有Content-Length头。

如果有Content-Length头说明是正常的二进制附件。

import lombok.extern.slf4j.Slf4j;
import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpHead;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.ssl.TrustStrategy;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;


@Slf4j
public class Main {

static String folder ="";

public static void main(String[] args) throws IOException{


Properties prop = System.getProperties();

String os = prop.getProperty("os.name");
if (os != null && os.toLowerCase().indexOf("linux") > -1) {
folder="/home/erdpc/";
}else
{
folder="d:/";
}

Main bootstrap = new Main();

List<String> files = bootstrap.readAttachments();

ExecutorService executor1 = Executors.newFixedThreadPool(50);


for (int i = 0; i <files.size(); i++)
{
MyTask task = new MyTask(files.get(i).replace("\\","/"));
executor1.execute(task);
}



System.in.read(); //阻塞主线程

}



static class MyTask implements Runnable {
private String filename;

public MyTask(String filename) {
this.filename = filename;
}

@Override
public void run() {


try {
String uri = "https://www.ksst-erdpc.cn/".concat(filename.replaceAll(" ","%20"));
HttpHead httpHead = new HttpHead(uri);
//巡检时更改为信任证书
CloseableHttpClient httpClient = buildDefaultHttpClientTrustSSL();

httpHead.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64)spider");
httpHead.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
httpHead.addHeader("Accept-Encoding", "gzip, deflate");
httpHead.addHeader("Accept-Language", "zh-CN,zh;q=0.9");

HttpResponse response = httpClient.execute(httpHead);
Header contentLength = response.getFirstHeader("Content-Length");
Header contentType = response.getFirstHeader("Content-Type");
if(contentLength == null&&"text/html;charset=UTF-8".equals(contentType.getValue()))
{
write(filename.replaceAll("%20"," ").concat("\r\n"));
}
} catch (IOException e) {
System.err.printf("文件检测出错:%s,信息:%s\r\n",filename.replaceAll("%20"," "),e.getMessage());
write(filename.replaceAll("%20"," ").concat("\r\n").concat(e.getMessage()));
}
}

@Override
public String toString() {
return "MyTask [filename=" + filename + "]";
}
}


private static void write(String url)
{
try {
Files.write(Paths.get(folder.concat("error.txt")),url.getBytes(),StandardOpenOption.APPEND);
} catch (IOException e) {
e.printStackTrace();
}
}

private List<String> readAttachments() {
try {
List<String> files = Files.readAllLines(Paths.get(folder.concat("attachment.txt")));
System.out.println(files.size());
return files.stream().map(file->{
if(file.startsWith("\""))
{
file = file.substring(1);
}

if(file.endsWith("\""))
{
file= file.substring(0,file.length()-1);
}
return file;
}).collect(Collectors.toList());
} catch (IOException e) {
e.printStackTrace();
}
return Collections.emptyList();
}




/**
* 信任SSL证书
*
* @return
*/
public static CloseableHttpClient buildDefaultHttpClientTrustSSL() {
SSLContext sslContext = null;
try {
sslContext = SSLContextBuilder.create().useProtocol(SSLConnectionSocketFactory.SSL).loadTrustMaterial(new TrustStrategy() {
@Override
public boolean isTrusted(X509Certificate[] x, String y) throws CertificateException {
return true;
}
}).build();
} catch (Exception e) {
e.printStackTrace();
}
RequestConfig config = RequestConfig.custom()
.setSocketTimeout(30000)
.setConnectTimeout(30000)
.setConnectionRequestTimeout(30000)
.setContentCompressionEnabled(true)
.build();
return HttpClientBuilder.create().setDefaultRequestConfig(config).setSSLContext(sslContext).setSSLHostnameVerifier(new HostnameVerifier() {
@Override
public boolean verify(String x, SSLSession y) {
return true;
}
}).build();
}

程序运行结果

服务器迁移后确认附件是否迁移成功_apache

 

 下图是文件丢失的(部分是黑客扫描数据)

服务器迁移后确认附件是否迁移成功_apache_02

参考来源:

​ https://stackoverflow.com/questions/4992317/illegal-character-in-path-at-index-16​