- 最近项目需求,有最低十万的数据导入最高500万的数据导入需求,poi,是开源对Excel支持非常强大的框架,因此研究了一番,此过程借阅网上多为人士的代码,和见解,我发现网上的代码都是那一套,是一个前辈在github上的一个开源项目,我也借阅了,非常感谢前辈提供.
- 我在借阅的时候发现假如一行中有单元格为空的话,会发生数据前移的情况,从而我们就无法准确的和数据库中数据对应,
- 第三就是其代码风格和我的不像,因此我研究了一番,下面说说我的心得
- 对于word 2007,其实际上就是一堆的xml的压缩包,你可以把xlsx的后缀改为.zip解压就会看到,找到workbook文件夹打开,找到sheet1.xml打开,发现表格数据以xml写入在里面,其中有各种标签,每个标签的含义大家可以自己去摸索,这里不再多少,因为这个你自己设置单元格各种类型就会出现各种标签,所以不再多说
- 好了废话少说,开始说代码
- 第一:准备工作
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/xerces/xercesImpl -->
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.9.1</version>
</dependency>
注意:${poi.version}就是版本号,这个版本号大家可以自己去maven官网查看,找一个比较新的,使用人数比较多的
- 第二,对于大数据Excel,2007是基于handle来处理,下面给出我写handle类
package com.rjhcsoft.credit.utils.poi.handle;
import com.rjhcsoft.credit.utils.StringUtil;
import com.rjhcsoft.credit.utils.poi.callback.Callback;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class ExcelXlsxHandle extends DefaultHandler {
private CellDataType nextDataType=CellDataType.SSTINDEX;;
private int formatIndex;
private String formatString;
private SharedStringsTable sst;
/**
* 单元格中的数据可能的数据类型
*/
enum CellDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL
}
private boolean isAvailabledOfRow = false;// 是否是有效行
private int availabledRows = 0;
private int totalRows = 0;
private int currentRowNum = 0;
private Map<String, String> cellMap = null;
private String key;
private String lastIndex;
private Callback callback;
public ExcelXlsxHandle(Callback callback, XSSFReader reader) {
this.callback = callback;
try {
sst=reader.getSharedStringsTable();
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (InvalidFormatException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if ("row".equalsIgnoreCase(qName)) {// 如果是行元素
totalRows++;// 总行数+1
String r = attributes.getValue("r");// 获取行号
currentRowNum = Integer.parseInt(r);
cellMap= new HashMap<>();
isAvailabledOfRow=false;
} else if ("c".equalsIgnoreCase(qName)) {// 如果是单元格
key = getKey(attributes);// 获取键值
cellMap.put(key, null);// 先放入map,单此时值为null
this.setNextDataType(attributes);
}
}
private String getKey(Attributes attributes){
return attributes.getValue("r").replaceAll("\\d*","");
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
// super.characters(ch, start, length);
lastIndex = new String(ch, start, length);
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
//super.endElement(uri, localName, qName);
if ("v".equalsIgnoreCase(qName)) {// 如果是值标签
String value = this.getDataValue(lastIndex.trim());
if (!StringUtil.isEmpty(value)) isAvailabledOfRow=true;
cellMap.put(key, value);// 重设值
} else if ("c".equalsIgnoreCase(qName)) {
key = null;// key置位null
lastIndex = null;// lastIndex置位null
} else if ("row".equalsIgnoreCase(qName)) {// 如果row是结束标签,说明一行结束
if (isAvailabledOfRow) {// 如果是有效行
availabledRows++;// 是有效行则有效行数+1
callback.callback(cellMap,currentRowNum,availabledRows);// 回调,将结果输送给客户端,让客户端处理
}
}
}
/**
* 处理数据类型
*
* @param attributes
*/
public void setNextDataType(Attributes attributes) {
nextDataType = CellDataType.NUMBER; //cellType为空,则表示该单元格类型为数字
formatIndex = -1;
formatString = null;
String cellType = attributes.getValue("t"); //单元格类型
String cellStyleStr = attributes.getValue("s"); //
String columnData = attributes.getValue("r"); //获取单元格的位置,如A1,B1
if ("b".equals(cellType)) { //处理布尔值
nextDataType = CellDataType.BOOL;
} else if ("e".equals(cellType)) { //处理错误
nextDataType = CellDataType.ERROR;
} else if ("inlineStr".equals(cellType)) {
nextDataType = CellDataType.INLINESTR;
} else if ("s".equals(cellType)) { //处理字符串
nextDataType = CellDataType.SSTINDEX;
} else if ("str".equals(cellType)) {
nextDataType = CellDataType.FORMULA;
}
}
/**
* 对解析出来的数据进行类型处理
* @param value 单元格的值,
* value代表解析:BOOL的为0或1, ERROR的为内容值,FORMULA的为内容值,INLINESTR的为索引值需转换为内容值,
* SSTINDEX的为索引值需转换为内容值, NUMBER为内容值,DATE为内容值
* @return
*/
@SuppressWarnings("deprecation")
public String getDataValue(String value) {
String thisStr = null;
switch (nextDataType) {
// 这几个的顺序不能随便交换,交换了很可能会导致数据错误
case BOOL: //布尔值
thisStr=value;
break;
case ERROR: //错误
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA: //公式
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = rtsi.toString();
rtsi = null;
break;
case SSTINDEX: //字符串
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx));//根据idx索引值获取内容值
thisStr = rtss.toString();
rtss = null;
} catch (NumberFormatException ex) {
thisStr = value.toString();
}
break;
case NUMBER: //数字
thisStr=value;
thisStr = thisStr.replace("_", "").trim();
break;
case DATE: //日期
thisStr=value;
break;
default:
thisStr = value;
break;
}
return thisStr;
}
public static void main(String[] args){
System.out.println("AA123".replaceAll("\\d*",""));
}
}
注意:这里处理Excel的值得时候,全部当成字符串来处理的,不管你是时间,还是数字还是其他,直接拿的原值,所以如果单元格是日期类型,你拿到的值其实是从1900年1月1日到今天的天数,是一个double类型,转换后续会说,现在就是拿原始值,所有的转换不在这里处理,如果有兴趣,也可以在这里处理
- ExcelUtil
package com.rjhcsoft.credit.utils.poi;
import com.rjhcsoft.credit.utils.StringUtil;
import com.rjhcsoft.credit.utils.poi.annos.Workbook;
import com.rjhcsoft.credit.utils.poi.callback.Callback;
import com.rjhcsoft.credit.utils.poi.handle.ExcelXlsxHandle;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.ZipPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Field;
import java.math.BigDecimal;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
public class ExcelUtil {
/**
* 只拿取第一个sheet
* @param callback
* @param file ,
*/
public static void readFirst(File file,Callback callback) {
XSSFReader reader = getXSSFReader(file);
XMLReader parser = getXMLReader(callback,reader);
Iterator<InputStream> sheetsData = getSheetsData(reader);
parseFirst(sheetsData,parser);
}
public static void readFirst(String path,Callback callback){
readFirst(new File(path),callback);
}
public static void readAll(File file,Callback callback){
XSSFReader reader = getXSSFReader(file);
XMLReader parser = getXMLReader(callback, reader);
Iterator<InputStream> sheetsData = getSheetsData(reader);
parseAll(sheetsData,parser);
}
public static void readAll(String path,Callback callback){
readAll(new File(path),callback);
}
public static <T> T resultToObj(Map<String,String> result,Class<T> clazz) {
try {
T t = clazz.newInstance();
Field[] fields = clazz.getDeclaredFields();
for (Field field:fields){
field.setAccessible(true);
Workbook workbook = field.getDeclaredAnnotation(Workbook.class);
if (workbook!=null){
String cell = workbook.cell();
String value = result.get(cell);
if (!StringUtil.isEmpty(value)){
value=value.trim();
if (field.getType()==String.class){
field.set(t,value);
}else if (field.getType()==Byte.class){
field.set(t,Byte.parseByte(value));
}else if (field.getType()==Short.class){
field.set(t,Short.parseShort(value));
}else if (field.getType()==Integer.class){
field.set(t,Integer.parseInt(value));
}else if (field.getType()==Long.class){
field.set(t,Long.parseLong(value));
}else if (field.getType()==Float.class){
field.set(t,Float.parseFloat(value));
}else if (field.getType()==Double.class){
field.set(t,Double.parseDouble(value));
}else if (field.getType()==Boolean.class){
field.set(t,value.equalsIgnoreCase("0")?false:true);
}else if (field.getType()==Character.class){
field.set(t,value.charAt(0));
}else if (field.getType()== BigDecimal.class){
field.set(t,new BigDecimal(value));
}else if (field.getType()== Date.class){
try {
double v = Double.parseDouble(value);
long m = (long) (v*24*60*60*1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
long n = Math.abs(sdf.parse("1900-00-30").getTime());
field.set(t,new Date(m-n));
} catch (Exception e) {
String format = workbook.format();
try {
field.set(t,new SimpleDateFormat(format).parse(value));
} catch (ParseException e1) {
e1.printStackTrace();
throw new RuntimeException(e1);
}
}
}
}
}
field.setAccessible(false);
}
return t;
} catch (InstantiationException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (IllegalAccessException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
private static void parseAll(Iterator<InputStream> sheetsData, XMLReader parser) {
while (sheetsData.hasNext()){
parse(sheetsData,parser);
}
}
private static void parse(Iterator<InputStream> sheetsData, XMLReader parser){
try(InputStream inputStream = sheetsData.next()) {
parser.parse(new InputSource(inputStream));
}catch (Exception e){
e.printStackTrace();
throw new RuntimeException(e);
}
}
private static void parseFirst(Iterator<InputStream> sheetsData,XMLReader parser){
if (sheetsData.hasNext()){
parse(sheetsData,parser);
}
}
private static XSSFReader getXSSFReader(File file){
if (!file.getName().endsWith(".xlsx")) throw new RuntimeException("请使用word 2007的Excel格式,即xlsx格式");
XSSFReader reader = null;
try {
reader = new XSSFReader(ZipPackage.open(file));
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (OpenXML4JException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
return reader;
}
private static XMLReader getXMLReader(Callback callback, XSSFReader reader){
XMLReader parser = null;
try {
parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
} catch (SAXException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
parser.setContentHandler(new ExcelXlsxHandle(callback,reader));
return parser;
}
private static Iterator<InputStream> getSheetsData(XSSFReader reader){
try {
return reader.getSheetsData();
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (InvalidFormatException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
public static void main(String[] args) throws ParseException {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT);
Date parse = sdf.parse("1970-1-1 00:00:00");
System.out.println(parse.getTime());
}
}
- Callback
package com.rjhcsoft.credit.utils.poi.callback;
import java.util.Map;
public interface Callback {
void callback(Map<String,String> result,int currentRowNumber,int availabledRows);
}
- workbook
package com.rjhcsoft.credit.utils.poi.annos;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
@Target(ElementType.FIELD)
@Retention(RetentionPolicy.RUNTIME)
public @interface Workbook {
String cell();
String format() default "";
}
- ThreadPool
package com.rjhcsoft.credit.utils.thread.pool;
import java.util.concurrent.Semaphore;
/**
* 封装线程池
*/
public abstract class BaseThreadPool {
private Semaphore semaphore;
{
init();
}
/**
* 初始化方法,此方法会在构造方法之前,属性之后执行
*/
protected abstract void init();
/**
* 构造方法执行
*/
public BaseThreadPool(){
this(5);
}
/**
* 构造方法执行
* @param permits 并发数
*/
public BaseThreadPool(int permits){
if (permits<1) throw new RuntimeException("并发数至少为1");
semaphore = new Semaphore(permits);
afterConstructor(permits);
}
/**
* 此为核心执行方法,
* @param execute 回调接口,此为用户实现其核心执行内容
*/
public void execute(Execute execute){
new Thread(new Runnable() {
@Override
public void run() {
try {
afterInitThread();
semaphore.acquire();
beforeExecute();
execute.execute();
afterExecute();
}catch (Exception e){
e.printStackTrace();
exeception(e);
}finally {
semaphore.release();
finallz();
}
}
}).start();
}
/**
* 在构造方法执行之后执行此方法
* @param permits
*/
protected abstract void afterConstructor(int permits);
/**
* 当线程初始化完成,但是还没来得及获取线程锁的时候,执行此方法
*/
protected abstract void afterInitThread();
/**
* 在业务代码执行之前执行此方法
*/
protected abstract void beforeExecute();
/**
* 在实际业务代码执行之后,执行此方法
*/
protected abstract void afterExecute();
/**
* 当出异常时执行此方法
*/
protected abstract void exeception(Exception e);
/**
* 当整个执行业务结束,不论是否出异常,都会执行此方法
*/
protected abstract void finallz();
public interface Execute{
void execute();
}
}
package com.rjhcsoft.credit.utils.thread.pool;
import com.rjhcsoft.credit.utils.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
public class SimpleThreadPool extends BaseThreadPool {
private static Logger LOGGER = LoggerFactory.getLogger(SimpleThreadPool.class);
private int permins;
private ArrayBlockingQueue<String> queue;
private AtomicInteger total;// 线程总数
private AtomicInteger core;// 核心池中的线程数
private AtomicInteger wait;// 等待数
public SimpleThreadPool(int permits){
super(permits);
}
@Override
protected void afterConstructor(int permits) {
this.permins=permits;
}
@Override
protected void init() {
queue = new ArrayBlockingQueue<String>(100);
total = new AtomicInteger(0);
core = new AtomicInteger(0);
wait = new AtomicInteger(0);
}
@Override
protected void afterInitThread() {
total.addAndGet(1);
wait.addAndGet(1);
String threadId = StringUtil.uuid();
Thread.currentThread().setName(threadId);
LOGGER.debug("线程["+threadId+"]初始化完成");
}
@Override
protected void beforeExecute() {
String name = Thread.currentThread().getName();
LOGGER.debug("线程["+name+"]进入核心池...");
wait.addAndGet(-1);
core.addAndGet(1);
String uuid = StringUtil.uuid();
try {
queue.put(uuid);
} catch (InterruptedException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
@Override
protected void afterExecute() {
core.addAndGet(-1);
}
@Override
protected void exeception(Exception e) {
throw new RuntimeException(e);
}
@Override
protected void finallz() {
String poll = queue.poll();
String threadId = Thread.currentThread().getName();
LOGGER.debug("线程["+threadId+"]出去了");
}
public void callback(Callback callback){
callback.callback(total.get(),core.get(),wait.get());
}
/**
* 获取总数,调用此方法,程序会进入500毫秒等待,然后判定是否有等待线程,如果有则递归,如果没有则返回
* @return
*/
public int getTotal() throws InterruptedException {
Thread.currentThread().sleep(500);
if (queue.isEmpty()) return total.get();
else return getTotal();
}
public interface Callback{
void callback(int total,int core,int wait);
}
}
- 最后测试,这里不支持word 2003格式,因为其需要监听器处理,而其监听器是全sheet遍历,不能指定sheet,所以放弃集成
- 测试代码:
package com.rjhcsoft.credit;
import com.alibaba.fastjson.annotation.JSONField;
import com.rjhcsoft.credit.utils.poi.annos.Workbook;
import java.math.BigDecimal;
import java.util.Date;
public class A {
@Workbook(cell = "A")
private Integer a;
@Workbook(cell = "B")
private Long b;
@Workbook(cell = "C")
private Double c;
@Workbook(cell = "D")
private Boolean d;
@Workbook(cell = "E")
private String e;
@Workbook(cell = "F",format = "yyyy-MM-dd")
@JSONField(format = "yyyy-MM-dd")
private Date f;
@Workbook(cell = "G")
private BigDecimal g;
@Workbook(cell = "H",format = "yyyy-MM-dd HH:mm:ss")
@JSONField(format = "yyyy-MM-dd HH:mm:ss")
private Date h;
public Date getH() {
return h;
}
public void setH(Date h) {
this.h = h;
}
public Integer getA() {
return a;
}
public void setA(Integer a) {
this.a = a;
}
public Long getB() {
return b;
}
public void setB(Long b) {
this.b = b;
}
public Double getC() {
return c;
}
public void setC(Double c) {
this.c = c;
}
public Boolean getD() {
return d;
}
public void setD(Boolean d) {
this.d = d;
}
public String getE() {
return e;
}
public void setE(String e) {
this.e = e;
}
public Date getF() {
return f;
}
public void setF(Date f) {
this.f = f;
}
public BigDecimal getG() {
return g;
}
public void setG(BigDecimal g) {
this.g = g;
}
}
@Autowired
private JdbcTemplate jdbcTemplate;
@Test
public void testThreadPool() throws InterruptedException {
ArrayBlockingQueue<Map<String,String>> queue = new ArrayBlockingQueue<>(100);
new Thread(new Runnable() {
@Override
public void run() {
ExcelUtil.readFirst("C:\\Users\\Administrator\\Desktop\\新建XLSX 工作表.xlsx", new Callback() {
@Override
public void callback(Map<String, String> map, int currentRowNumber, int availabledRows) {
try {
queue.put(map);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
});
}
}).start();
SimpleThreadPool pool = new SimpleThreadPool(5);
boolean flag = true;
int m = 0;
while (flag){
Map<String, String> map = queue.poll();
if (map==null||map.isEmpty()){
m++;
if (m>100) flag=false;
else
Thread.currentThread().sleep(10);
}else {
m = 0;
A a = ExcelUtil.resultToObj(map, A.class);
pool.execute(new BaseThreadPool.Execute() {
@Override
public void execute() {
jdbcTemplate.update("insert into T_TEST(a,b,c,d,e,f,g,h) values (?,?,?,?,?,?,?,?)",
a.getA(),a.getB(),a.getC(),a.getD(),a.getE(),a.getF(),a.getG(),a.getH());
}
});
}
}
}
- 测试数据库表:
CREATE TABLE `T_TEST` (
`a` int(11) DEFAULT NULL,
`b` bigint(20) DEFAULT NULL,
`c` double DEFAULT NULL,
`d` tinyint(1) DEFAULT NULL,
`e` varchar(255) DEFAULT NULL,
`f` datetime DEFAULT NULL,
`g` decimal(10,0) DEFAULT NULL,
`h` datetime DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
至此:结束!有什么意见的可以提出,大家多交流,