近期做的一个项目由于客户需求,需要将网站的首页静态化。因为自己从未接触过静态化的相关知识,所以 只好硬着头皮导出找资料,焦头烂额。最后想到一种解决方案,用爬虫技术把自己的首页静态化,然后将爬下来的整个页面把首页替换掉。这样用户访问的就是一个静态资源了。百度了一下,发现果然有这样的案例,果断改改拿来用了,写此博客来记录一下。
package com.evcipa.comutil;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.List;
import java.util.Map;
import java.util.Timer;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.io.output.FileWriterWithEncoding;
import org.apache.log4j.Logger;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
/**
* @Description:生成静态页面
* @Commpany BK
* @author ZhangAn
*/
public class HtmlGenerator {
HttpClient httpClient = null; //HttpClient实例
GetMethod getMethod =null; //GetMethod实例
BufferedWriter fw = null;
String page = null;
String webappname = null;
BufferedReader br = null;
InputStream in = null;
StringBuffer sb = null;
String line = null;
//构造方法
// public HtmlGenerator(String webappname){
// this.webappname = webappname;
//
// }
private static Logger logger=Logger.getLogger(HtmlGenerator.class);
public boolean createHtmlPage(String url,String htmlFileName){
boolean status = false;
int statusCode = 0;
try{
//创建一个HttpClient实例充当模拟浏览器
httpClient = new HttpClient();
//设置httpclient读取内容时使用的字符集
httpClient.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET,"UTF-8");
//创建GET方法的实例
getMethod = new GetMethod(url);
//使用系统提供的默认的恢复策略,在发生异常时候将自动重试3次
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
//设置Get方法提交参数时使用的字符集,以支持中文参数的正常传递
getMethod.addRequestHeader("Content-Type","text/html;charset=UTF-8");
//执行Get方法并取得返回状态码,200表示正常,其它代码为异常
statusCode = httpClient.executeMethod(getMethod);
if (statusCode!=200) {
logger.error("静态页面引擎在解析"+url+"产生静态页面"+htmlFileName+"时出错!");
}else{
//读取解析结果
// System.out.println(getMethod.getResponseBodyAsStream().read());
sb = new StringBuffer();
Thread.sleep(20000);
in = getMethod.getResponseBodyAsStream();
//br = new BufferedReader(new InputStreamReader(in));//此方法默认会乱码,经过长时期的摸索,下面的方法才可以
br = new BufferedReader(new InputStreamReader(in,"UTF-8"));
while((line=br.readLine())!=null){
sb.append(line+"\n");
}
if(br!=null)br.close();
page = sb.toString();
//将页面中的相对路径替换成绝对路径,以确保页面资源正常访问
page = formatPage(page);
//将解析结果写入指定的静态HTML文件中,实现静态HTML生成
writeHtml(htmlFileName,page);
status = true;
}
}catch(Exception ex){
logger.error(ex.getMessage());
logger.error("静态页面引擎在解析"+url+"产生静态页面"+htmlFileName+"时出错:"+ex.getMessage());
}finally{
//释放http连接
getMethod.releaseConnection();
}
return status;
}
//将解析结果写入指定的静态HTML文件中
private synchronized void writeHtml(String htmlFileName,String content) throws Exception{
fw = new BufferedWriter(new FileWriter(htmlFileName));
OutputStreamWriter fw = new OutputStreamWriter(new FileOutputStream(htmlFileName),"UTF-8");
fw.write(page);
if(fw!=null)fw.close();
}
//将页面中的相对路径替换成绝对路径,以确保页面资源正常访问
private String formatPage(String page){
page = page.replaceAll("\\.\\./\\.\\./\\.\\./", webappname+"/");
page = page.replaceAll("\\.\\./\\.\\./", webappname+"/");
page = page.replaceAll("\\.\\./", webappname+"/");
return page;
}
public static boolean getHtmlPage(String url,String rootPath){
boolean status = false;
//创建一个可执行js,css,ajax的多功能WebClient
WebClient multiWebClient = new WebClient(BrowserVersion.CHROME);
multiWebClient.getOptions().setJavaScriptEnabled(true);//执行JavaScript
multiWebClient.getOptions().setCssEnabled(true);//执行css
multiWebClient.setAjaxController(new NicelyResynchronizingAjaxController());//设置ajax代理
multiWebClient.getOptions().setTimeout(50000);//设置超时时间
try {
HtmlPage htmlPage = multiWebClient.getPage(url);
multiWebClient.waitForBackgroundJavaScript(20000);//设置加载js时间
if(stringToFile(htmlPage.asXml(),rootPath)){//把html写入文件中
status=true;
}else{
status=false;
}
} catch (Exception e) {
// TODO: handle exception
logger.error(e.getMessage());
status=false;
}finally {
multiWebClient.closeAllWindows();
}
return status;
}
/**
*
* @Title: stringToFile
* @Description: TODO 将html写入路径内
* @param @param html
* @param @param rootPath
* @param @return 条件参数
* @return boolean 返回类型
* @Commpany BK
* @author ZhangAn
*/
public static boolean stringToFile(String html,String rootPath){
try {
FileWriterWithEncoding fileWriter = new FileWriterWithEncoding(rootPath,"utf-8");
fileWriter.write(html);
fileWriter.flush();
fileWriter.close();
return true;
} catch (IOException e) {
logger.error(e.getMessage());
return false;
}
}
public static String sendGet(String url) {
String result = "";// 返回的结果
BufferedReader in = null;// 读取响应输入流
StringBuffer sb = new StringBuffer();// 存储参数
String params = "";// 编码之后的参数
try {
// 编码请求参数
/* if (parameters.size() == 1) {
for (String name : parameters.keySet()) {
sb.append(name).append("=").append(
java.net.URLEncoder.encode(parameters.get(name),
"UTF-8"));
}
params = sb.toString();
} else {
for (String name : parameters.keySet()) {
sb.append(name).append("=").append(
java.net.URLEncoder.encode(parameters.get(name),
"UTF-8")).append("&");
}
String temp_params = sb.toString();
params = temp_params.substring(0, temp_params.length() - 1);
} */
// String full_url = url + "?" + params;
// 创建URL对象
java.net.URL connURL = new java.net.URL(url);
// 打开URL连接
java.net.HttpURLConnection httpConn = (java.net.HttpURLConnection) connURL
.openConnection();
// 设置通用属性
httpConn.setRequestProperty("Accept", "*/*");
httpConn.setRequestProperty("Connection", "Keep-Alive");
httpConn.setRequestProperty("User-Agent",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)");
// 建立实际的连接
httpConn.connect();
// 响应头部获取
/* Map<String, List<String>> headers = httpConn.getHeaderFields();
// 遍历所有的响应头字段
for (String key : headers.keySet()) {
// System.out.println(key + "\t:\t" + headers.get(key));
} */
// 定义BufferedReader输入流来读取URL的响应,并设置编码方式
in = new BufferedReader(new InputStreamReader(httpConn
.getInputStream(), "UTF-8"));
String line;
// 读取返回的内容
while ((line = in.readLine()) != null) {
result += line;
}
} catch (Exception e) {
logger.error(e.getMessage());
} finally {
try {
if (in != null) {
in.close();
}
} catch (IOException ex) {
logger.error(ex.getMessage());
}
}
return result;
}
//测试方法
public static void main(String[] args){
HtmlGenerator h = new HtmlGenerator("webappname");
h.createHtmlPage("http://localhost:8080/evcipa/views/index.jsp","D:/a.html");
System.out.println("静态页面已经生成到D:/a.html");
}
}
后续处理
由于是静态页面,所以如果需要改些东西,就不会像动态页面那样动态生成了,所以需要一个定时的任务来执行它,定期更新动态的内容。