同学突然有个蛋疼的需求,抓取以下界面的数据至数据库,我因为很闲,更加蛋疼的自告奋勇帮忙完成。
主要分为3部分:
1)从网页获取数据。
2)将获取的数据实体list集合中去。
3)存入数据库中。
从网页获取数据代码如下(获取到的是xml格式数据):
public class GetHttpXml {
public static List<Metal> captureJavascript() throws Exception {
String strURL = "http://www.shmet.com/Template/_Template.html?viewName=_HomeSpotPrice&metalid=10133%2C10131%2C10132%2C10002%2C10003%2C10134%2C10135&_=1453249939502";
URL url = new URL(strURL);
HttpURLConnection httpConn = (HttpURLConnection) url.openConnection();
InputStreamReader input = new InputStreamReader(httpConn
.getInputStream(), "utf-8");
BufferedReader bufReader = new BufferedReader(input);
String line = "";
StringBuilder contentBuf = new StringBuilder();
while ((line = bufReader.readLine()) != null) {
contentBuf.append(line);
}
DomParseService dom = new DomParseService();
List<Metal> dataList = dom.getData(contentBuf.toString().trim());
for(Metal metal : dataList){
System.out.println(metal.getChange() + "/ " + metal.getDate() + "/ " + metal.getName() + "/ " + metal.getPrice() + "/ " + metal.getUpWater());
}
return dataList;
}
这大部分是网上当的,基本内容一次性成功,所以不作说明。
接下来解析xml,我使用的是dom4j解析,由于格式不大友好,还是折腾我一段时间的,代码如下:
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
public class DomParseService {
List<Metal> listMetal = new ArrayList<Metal>();
public List<Metal> getData(String inputStream) throws Exception{
Document document = DocumentHelper.parseText(inputStream);
//获取根节点元素对象
Element rootElement = document.getRootElement();
// 获取tbody节点。
Element element = rootElement.element("tbody");
Iterator<Element> iteratorTbody = element.elementIterator();
while(iteratorTbody.hasNext()){
Element eleTr = iteratorTbody.next();
if(eleTr.attributeCount() > 0){
Iterator<Element> iteratorTr = eleTr.elementIterator();
Metal metal = new Metal();
while(iteratorTr.hasNext()){
Element eleTd = iteratorTr.next();
// System.out.println(eleTd.getText() + " ---- " + eleTd.attributeValue("class") + "\n\t");
if(eleTd.attributeCount() == 0){
metal.setUpWater(eleTd.getText());
}
else if(eleTd.attributeCount() > 0)
{
// System.out.println(eleTd.getText() + " ---- " + eleTd.attributeValue("class").trim() + "\n\t");
if(eleTd.attributeValue("class").indexOf("name") != -1){
metal.setName(eleTd.getText());
}
else if(eleTd.attributeValue("class").indexOf("price") != -1){
metal.setPrice(eleTd.getText());
}
else if(eleTd.attributeValue("class").indexOf("change") != -1){
Iterator<Element> iteratorFont = eleTd.elementIterator();
while(iteratorFont.hasNext()){
Element eleFont = iteratorFont.next();
metal.setChange(eleFont.getText());
}
}
else if(eleTd.attributeValue("class").indexOf("today") != -1){
metal.setDate(eleTd.getText());
}
}
// System.out.println(metal.getChange() + "/ " + metal.getDate() + "/ " + metal.getName() + "/ " + metal.getPrice() + "/ " + metal.getUpWater() + "\n\t");
}
listMetal.add(metal);
}
}
return listMetal;
}
}
在上面有个疑惑:如eleTd.attributeValue("class").indexOf("price") != -1就是想判断属性class的值是否包含price字段,原先用eleTd.attributeValue("class").Trim() == "price"没有判断成功,eleTd.attributeValue("class").Trim()应该不包含别的字符,怎么会出错呢?
PS:我是用dom4j解析的,需要导入dom4j.jar包。
最后就是将数据导入数据库中,使用的是网上搜到的工具:
PS:这个也需要导包,从微软上下sqljdbc.jar包
import java.lang.reflect.*;
import java.sql.*;
import java.util.*;
public class SqlHelper {
// SQL Server
/**
* JDBC驱动名称
*/
public static final String CLASS_NAME = "com.microsoft.sqlserver.jdbc.SQLServerDriver";
/**
* 数据库连库字符串
*/
public static final String URL = "jdbc:sqlserver://192.168.63.109;databaseName=HTTPCollection";
/**
* 用户名
*/
public static final String UID = "sa";
/**
* 密码
*/
public static final String PWD = "jxjxjx123";
/**
* JDBC驱动类型
*/
public static Class CLS = null;
// Oracle
// public static final String CLASS_NAME =
// "oracle.jdbc.driver.OracleDriver";
// public static final String URL =
// "jdbc:oracle:thin:@localhost:1522:accp11g";
// public static final String UID = "system";
// public static final String PWD = "manager";
/**
* 获取数据库连接对象
*
* @return
* @throws ClassNotFoundException
* @throws SQLException
*/
public static Connection getConnection() throws ClassNotFoundException,
SQLException {
if (CLS == null) {
CLS = Class.forName(CLASS_NAME);
}
return DriverManager.getConnection(URL, UID, PWD);
}
/**
* 执行SQL语句不返回查询的操作,返回受影响的行数
*
* @param sql
* SQL语句
* @return 受影响的行数
* @throws ClassNotFoundException
* @throws SQLException
*/
public static int executeNonQuery(String sql) {
int result = -1;
Connection con = null;
PreparedStatement ps = null;
try {
con = getConnection();
ps = con.prepareStatement(sql);
result = ps.executeUpdate();
} catch (Exception e) {
e.printStackTrace();
} finally {
close(con, ps, null);
}
return result;
}
/**
* 执行Insert语句,返回Insert成功之后标识列的值
*
* @param sql
* @return
* @throws ClassNotFoundException
* @throws SQLException
*/
public static int executeIdentity(String sql) {
int identity = -1;
Connection con = null;
Statement ps = null;
ResultSet rs = null;
try {
con = getConnection();
ps = con.createStatement();
ps.executeUpdate(sql, Statement.RETURN_GENERATED_KEYS);
rs = ps.getGeneratedKeys();
if (rs.next()) {
// identity = rs.getInt("GENERATED_KEYS");
identity = rs.getInt(1);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
close(con, ps, null);
}
return identity;
}
/**
* 执行不返回结果集的存储过程
*
* @param sql
* 存储过程名称
* @param params
* 存储过程参数
* @throws ClassNotFoundException
* @throws SQLException
*/
public static void executeNonQuery(String sql, SqlParameter... params) {
Connection con = null;
CallableStatement cs = null;
try {
con = getConnection();
cs = con.prepareCall(sql);
setSqlParameter(cs, params);
cs.executeUpdate();
getSqlParameter(cs, params);
} catch (Exception e) {
e.printStackTrace();
} finally {
close(con, cs, null);
}
}
/**
* 执行返回聚合函数的操作
*
* @param sql
* 含有聚合函数的SQL语句
* @return 聚合函数的执行结果
* @throws SQLException
* @throws ClassNotFoundException
*/
public static int executeScalar(String sql) {
int result = -1;
Connection con = null;
PreparedStatement ps = null;
ResultSet rs = null;
try {
con = getConnection();
ps = con.prepareStatement(sql);
rs = ps.executeQuery();
if (rs.next()) {
result = rs.getInt(1);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
close(con, ps, rs);
}
return result;
}
/**
* 执行返回泛型集合的SQL语句
*
* @param cls
* 泛型类型
* @param sql
* 查询SQL语句
* @return 泛型集合
* @throws ClassNotFoundException
* @throws SQLException
* @throws InstantiationException
* @throws IllegalAccessException
*/
public static <T> List<T> executeList(Class<T> cls, String sql) {
List<T> list = new ArrayList<T>();
Connection con = null;
PreparedStatement ps = null;
ResultSet rs = null;
try {
con = getConnection();
ps = con.prepareStatement(sql);
rs = ps.executeQuery();
while (rs.next()) {
T obj = executeResultSet(cls, rs);
list.add(obj);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
close(con, ps, rs);
}
return list;
}
/**
* 执行返回泛型集合的存储过程
*
* @param cls
* 泛型类型
* @param sql
* 存储过程名称
* @param params
* 存储过程参数
* @return 泛型集合
* @throws ClassNotFoundException
* @throws SQLException
* @throws InstantiationException
* @throws IllegalAccessException
*/
public static <T> List<T> executeList(Class<T> cls, String sql,
SqlParameter... params) {
List<T> list = new ArrayList<T>();
Connection con = null;
CallableStatement cs = null;
ResultSet rs = null;
try {
con = getConnection();
cs = con.prepareCall(sql);
setSqlParameter(cs, params);
rs = cs.executeQuery();
while (rs.next()) {
T obj = executeResultSet(cls, rs);
list.add(obj);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
close(con, cs, rs);
}
return list;
}
/**
* 执行返回泛型类型对象的SQL语句
*
* @param cls
* 泛型类型
* @param sql
* SQL语句
* @return 泛型类型对象
* @throws SQLException
* @throws ClassNotFoundException
* @throws InstantiationException
* @throws IllegalAccessException
*/
public static <T> T executeEntity(Class<T> cls, String sql) {
T obj = null;
Connection con = null;
PreparedStatement ps = null;
ResultSet rs = null;
try {
con = getConnection();
ps = con.prepareStatement(sql);
rs = ps.executeQuery();
while (rs.next()) {
obj = executeResultSet(cls, rs);
break;
}
} catch (Exception e) {
e.printStackTrace();
} finally {
close(con, ps, rs);
}
return obj;
}
/**
* 执行返回泛型类型对象的存储过程
*
* @param cls
* 泛型类型
* @param sql
* SQL语句
* @param params
* 存储过程参数
* @return 泛型类型对象
* @throws SQLException
* @throws ClassNotFoundException
* @throws InstantiationException
* @throws IllegalAccessException
*/
public static <T> T executeEntity(Class<T> cls, String sql,
SqlParameter... params) {
T obj = null;
Connection con = null;
CallableStatement cs = null;
ResultSet rs = null;
try {
con = getConnection();
cs = con.prepareCall(sql);
setSqlParameter(cs, params);
rs = cs.executeQuery();
while (rs.next()) {
obj = executeResultSet(cls, rs);
break;
}
} catch (Exception e) {
e.printStackTrace();
} finally {
close(con, cs, rs);
}
return obj;
}
/**
* 将一条记录转成一个对象
*
* @param cls
* 泛型类型
* @param rs
* ResultSet对象
* @return 泛型类型对象
* @throws InstantiationException
* @throws IllegalAccessException
* @throws SQLException
*/
private static <T> T executeResultSet(Class<T> cls, ResultSet rs)
throws InstantiationException, IllegalAccessException, SQLException {
T obj = cls.newInstance();
ResultSetMetaData rsm = rs.getMetaData();
int columnCount = rsm.getColumnCount();
// Field[] fields = cls.getFields();
Field[] fields = cls.getDeclaredFields();
for (int i = 0; i < fields.length; i++) {
Field field = fields[i];
String fieldName = field.getName();
for (int j = 1; j <= columnCount; j++) {
String columnName = rsm.getColumnName(j);
if (fieldName.equalsIgnoreCase(columnName)) {
Object value = rs.getObject(j);
field.setAccessible(true);
field.set(obj, value);
break;
}
}
}
return obj;
}
/**
* 设置存储过程参数名称,参数值,参数方向
*
* @param cs
* @param params
* @throws SQLException
*/
private static void setSqlParameter(CallableStatement cs,
SqlParameter... params) throws SQLException {
if (params != null) {
for (SqlParameter param : params) {
if (param.OutPut) {
String paramName = param.Name;
if (paramName == null || paramName.equals("")) {
cs.registerOutParameter(1, param.Type);// 设置返回类型参数
} else {
cs.registerOutParameter(paramName, param.Type);// 设置输出类型参数
}
} else {
cs.setObject(param.Name, param.Value);// 设置输入类型参数
}
}
}
}
/**
* 得到存储过程参数执行结果
*
* @param cs
* @param params
* @throws SQLException
*/
private static void getSqlParameter(CallableStatement cs,
SqlParameter... params) throws SQLException {
for (SqlParameter param : params) {
if (param.OutPut) {
String paramName = param.Name;
if (paramName == null || paramName.equals("")) {
param.Value = cs.getObject(1);// 返回类型参数值
} else {
param.Value = cs.getObject(paramName);// 输出类型参数值
}
}
}
}
/**
* 关闭JDBC对象,释放资源。
*
* @param con
* 连接对象
* @param ps
* 命令对象
* @param rs
* 结果集对象
* @throws SQLException
*/
private static void close(Connection con, Statement ps, ResultSet rs) {
try {
// rs.close(); //jiangxiang删除改行,当多次掉用该语句时会报空指针异常,因为rs已关闭。
if (rs != null) {
rs = null;
}
if (ps != null) {
ps.close();
ps = null;
}
if (con != null) {
con.close();
con = null;
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/**
* 存储过程参数类型
* @author Administrator
*
*/
public class SqlParameter {
/**
* 参数名称
*/
public String Name;
/**
* 参数值
*/
public Object Value;
/**
* true表示参数为输出类型
*/
public boolean OutPut;
/**
* 参数类型
*/
public int Type;
/**
* 输入类型参数的构造函数
* @param name 存储过程 输入类型 参数名称
* @param value 存储过程 输入类型 参数值
*/
public SqlParameter(String name,Object value){
this.Name = name;
this.Value= value;
}
/**
* 输出类型参数的构造函数
* @param type 存储过程 输出类型 参数类型
* @param name 存储过程 输出类型 参数名称
*/
public SqlParameter(int type,String name){
this.Name = name;
this.OutPut = true;
this.Type = type;
}
/**
* 返回类型参数的构造函数
* @param type 存储过程 返回类型
*/
public SqlParameter(int type){
this.Name = "";
this.OutPut = true;
this.Type = type;
}
}
工具类挺好用的,不过使用的时候循环导入时报错,断点调试后发现close中有问题,将一行代码注释掉就好了。
最后测试下:
public static void main(String[] args) throws Exception {
List<Metal> list =captureJavascript();
for(Metal metal : list){
String sql = "insert into Metal([name],[price],[change],[upWater],[date]) values('" + metal.getName() + "','" + metal.getPrice() + "','" + metal.getChange() + "','" + metal.getUpWater() + "','" + metal.getDate() +"')";
int identyValue = SqlHelper.executeIdentity(sql);
}
}
成功,数据库和实体类不附上了。
利用时间做些有意义的事情。