Excel 多线程导出大文件
- - 前言
- - Work_easyExcel多线程大数据导出
- - Demo_Excel多线程大数据导出
- - 多快
- 前言
对于 10W+ 以上的数据导出,可以有以下思路:
- 异步导出 (@Async 注解 + @EnableAsync 注解)
- 分片导出,利用 多线程 每 2W 条数据打包成一个 Excel ;
- 将这些Excel 打包成一个 Zip流 传到 阿里云 OSS 上;
- 待生成 zip文件之后 ,通过MQ 或者其他模式消息通知到用户,导出失败或者成功;
提示:这里主要讲解如果利用多线程导出 大数据 excel (文件不落地),并生成 zip 流 上传
- Work_easyExcel多线程大数据导出
Pom.xml
maven+1.8jdk+poi+easyExcel
<!--xls-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<!--xlsx-->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>2.2.6</version>
</dependency>
代码如下:
利用多线程生成 Excel 流
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.metadata.BaseRowModel;
import com.alibaba.excel.metadata.Sheet;
import com.alibaba.excel.support.ExcelTypeEnum;
import com.alibaba.excel.write.metadata.holder.WriteWorkbookHolder;
import lombok.extern.log4j.Log4j;
import org.apache.poi.ss.usermodel.Workbook;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
/**
* 导出new线程池工具类
*/
@Log4j
public class PageThreadPool<E> implements Runnable {
private final CountDownLatch countDownLatch;
private String title;
private Class<? extends BaseRowModel> clazz;
private Map<String, byte[]> byteList;
private List<E> list;
public PageThreadPool(CountDownLatch countDownLatch, String title, List<E> list, Class<? extends BaseRowModel> clazz, Map<String, byte[]> byteList) {
this.countDownLatch = countDownLatch;
this.title = title;
this.list = list;
this.clazz = clazz;
this.byteList = byteList;
}
@Override
public void run() {
ByteArrayOutputStream bos = null;
ExcelWriter writer = null;
Workbook workbook;
try {
Sheet sheet1 = new Sheet(1, 0, clazz);
sheet1.setSheetName("sheet1");
ByteArrayOutputStream out = new ByteArrayOutputStream();
writer = new ExcelWriter(out, ExcelTypeEnum.XLSX);
writer.write(list, sheet1);
workbook = writer.writeContext().getWorkbook();
WriteWorkbookHolder holder = writer.writeContext().writeWorkbookHolder();
holder.setAutoCloseStream(true);
bos = new ByteArrayOutputStream();
workbook.write(bos);
bos.flush();
out.flush();
out.close();
//put 文件名和文件字节数组
byteList.put(this.title, bos.toByteArray());
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (bos != null) {
bos.close();
}
if (writer != null) {
writer.writeContext().finish(true);
}
} catch (IOException e) {
e.printStackTrace();
}
if (countDownLatch != null) {
countDownLatch.countDown();
}
}
}
}
根据每次生成 Excel 文档的行数计算线程池的大小
import com.alibaba.excel.ExcelWriter;
import com.alibaba.excel.metadata.BaseRowModel;
import com.alibaba.excel.metadata.Sheet;
import com.alibaba.excel.support.ExcelTypeEnum;
import com.aliyun.oss.OSSClient;
import lombok.extern.log4j.Log4j;
import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.mongodb.core.BulkOperations;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.data.mongodb.core.query.Update;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class bigDataExport{
/**
* excel 最大行数
*/
private static final Integer BIG_EXCEL_ROWS = 20000;
/**
* excel导出
*
* @param list 数据列表
* @param titleSerialN 生成Excel标题
* @param object 样式
* @param <E>
* @return
*/
public static <E> ByteArrayInputStream bigDataExport(List<E> list, String titleSerialN, Class<? extends BaseRowModel> object) {
ByteArrayInputStream is;
byte[] buffer;
Map<String, byte[]> byteList = new HashMap<>();
//大于多少行 进行多线程操作
if (list.size() > BIG_EXCEL_ROWS) {
//页数
int pageNum = list.size() / BIG_EXCEL_ROWS;
//取余
int lastCount = list.size() % BIG_EXCEL_ROWS;
// 计算几页
int page = lastCount == 0 ? pageNum : pageNum + 1;
//倒计时锁
CountDownLatch downLatch = new CountDownLatch(page);
//定义线程池 按sheet设置线程池量
ExecutorService executor = Executors.newFixedThreadPool(page);
List<E> subList;
for (int c = 0; c <= pageNum; c++) {
int rowNum = BIG_EXCEL_ROWS;
String title = titleSerialN + "_" + (c + 1) + ".xlsx";
if (c == pageNum) {
if (lastCount == 0) {
continue;
}
subList = list.subList(c * rowNum, c * rowNum + lastCount);
} else {
subList = list.subList(c * rowNum, (c + 1) * rowNum);
}
//动态生成文件名:
executor.execute(new PageThreadPool(downLatch, title, subList, object, byteList));
}
try {
downLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
}
executor.shutdown();
} else {
ExcelWriter writer = null;
Workbook workbook = null;
ByteArrayOutputStream out = null;
ByteArrayOutputStream bos = null;
try {
Sheet sheet1 = new Sheet(1, 0, object);
sheet1.setSheetName("sheet1");
out = new ByteArrayOutputStream();
writer = new ExcelWriter(out, ExcelTypeEnum.XLSX);
writer.write(list, sheet1);
workbook = writer.writeContext().getWorkbook();
bos = new ByteArrayOutputStream();
workbook.write(bos);
byteList.put(titleSerialN + ".xlsx", bos.toByteArray());
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (out != null) {
out.close();
}
if (bos != null) {
bos.close();
}
if (writer != null) {
writer.writeContext().finish(true);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
buffer = ZipUtils.zipFileSteam(byteList);
is = new ByteArrayInputStream(buffer);
return is;
}
}
将生成的 Excel 流 压缩为 Zip流
import lombok.extern.log4j.Log4j;
import java.io.*;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
/**
* 压缩zip工具类
*/
@Log4j
public class ZipUtils {
/**
* 压缩工具类
*
* @param byteList 文件字节码Map,k:fileName,v:byte[]
* @return 返回压缩流
*/
public static byte[] zipFileSteam(Map<String, byte[]> byteList) {
byte[] buffer = null;
ByteArrayOutputStream bos = null;
try {
bos = new ByteArrayOutputStream();
ZipOutputStream zipOutputStream = new ZipOutputStream(bos);
byteList.forEach((k, v) -> {
//写入一个条目,我们需要给这个条目起个名字,相当于起一个文件名称
try {
zipOutputStream.putNextEntry(new ZipEntry(k));
zipOutputStream.write(v);
} catch (IOException e) {
e.printStackTrace();
log.info(StringUtil.join(LogConstant.SERVICE, LogConstant.RESULT, LogConstant.FAIL, JSONUtil.toStr(e.getMessage())));
}
});
//关闭条目
zipOutputStream.closeEntry();
zipOutputStream.close();
buffer = bos.toByteArray();
} catch (IOException e) {
e.printStackTrace();
}finally {
try {
bos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return buffer;
}
样式实体类
import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.annotation.write.style.ColumnWidth;
import com.alibaba.excel.metadata.BaseRowModel;
import io.swagger.annotations.ApiModelProperty;
import lombok.Data;
import java.io.Serializable;
import java.math.BigDecimal;
@Data
public class style extends BaseRowModel implements Serializable {
/**
* 日期 YYYY/MM/DD HH:MM:SS
*/
@ExcelProperty(value="时间", index = 0)
@ColumnWidth(12)
@ApiModelProperty(value = "时间")
private String dateTime;
@ExcelProperty(value="交易时间", index = 1)
@ColumnWidth(12)
@ApiModelProperty(value = "交易时间")
private String dealDatetime;
@ExcelProperty(value="个人账户", index = 2)
@ColumnWidth(15)
@ApiModelProperty(value = "个人账户")
private String accountName;
@ExcelProperty(value="组名", index = 3)
@ColumnWidth(15)
@ApiModelProperty(value = "组名")
private String groupName;
调用
public static void main(String[] args) {
/**
*
* list 从数据查出来的数据源
*
* is 打印 输出一个字节流
*
* AnpayAccountMoreDynamicBalanceDetailGridOut 样式
*/
String titleSerialN = "生成的文档压缩.zip";
List<?> list = new ArrayList();
InputStream is = null;
is = bigDataExport(list, titleSerialN, style.class);
System.out.println("is = " + is);
}
注意:
- 当数据导出数量为 3W 时,项目启动后,第一次,第二次…直到 第四次时导出会抛出warn异常,但是都不会影响结果的导出,也没有准确报出代码中的行数
- 当数据导出 60W 时,项目启动后,第一次导出可以,第二次导出时会抛出抛出warn异常,也没有准确报出代码中的行数,但是都不会影响结果的导出
2022-04-01 16:44:38.885 [Finalizer] WARN com.alibaba.excel.ExcelWriter - Destroy object failed
com.alibaba.excel.exception.ExcelGenerateException: Can not close IO.
at com.alibaba.excel.context.WriteContextImpl.finish(WriteContextImpl.java:378)
at com.alibaba.excel.write.ExcelBuilderImpl.finish(ExcelBuilderImpl.java:95)
at com.alibaba.excel.ExcelWriter.finish(ExcelWriter.java:329)
at com.alibaba.excel.ExcelWriter.finalize(ExcelWriter.java:340)
at java.lang.System$2.invokeFinalize(System.java:1270)
at java.lang.ref.Finalizer.runFinalizer(Finalizer.java:102)
at java.lang.ref.Finalizer.access$100(Finalizer.java:34)
at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:217)
Caused by: java.io.IOException: Stream closed
at java.io.BufferedWriter.ensureOpen(BufferedWriter.java:116)
at java.io.BufferedWriter.flushBuffer(BufferedWriter.java:126)
at java.io.BufferedWriter.flush(BufferedWriter.java:253)
at org.apache.poi.xssf.streaming.SheetDataWriter.close(SheetDataWriter.java:127)
at org.apache.poi.xssf.streaming.SXSSFSheet.getWorksheetXMLInputStream(SXSSFSheet.java:98)
at org.apache.poi.xssf.streaming.SXSSFWorkbook.injectData(SXSSFWorkbook.java:389)
at org.apache.poi.xssf.streaming.SXSSFWorkbook.write(SXSSFWorkbook.java:936)
at com.alibaba.excel.context.WriteContextImpl.finish(WriteContextImpl.java:339)
... 7 common frames omitted
解决 Bug ,在 finally 中可以在 writeContext 强制抑制抛出异常,具体代码可见上面:
- Demo_Excel多线程大数据导出
Pom.xml
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.4</version>
</dependency>
代码
通过 计算数据源条数 计算线程池的大小
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
public class BigDataExport {
public static void main(String[] args) {
int size = 70__0000;
List<Integer> list = new ArrayList<>();
for (int i = 0; i < size; i++) {
list.add(1);
}
//生成 excel 文件名
String title = "GLOOX-SCATTER-CHART_SXSSFW_";
//生成 ZIP 目录
String zipFilePath = "/Users/fico/Downloads/";
//生成 ZIP 文件名
String zipFileName = "ZIP";
long timeMillis = System.currentTimeMillis();
System.out.println("开始 [导出Excel+打包ZIP] 时间为:" + timeMillis);
bigDataExport(list, title, zipFilePath, zipFileName);
System.out.println("结束 [导出Excel+打包ZIP] 时间为:" + (System.currentTimeMillis() - timeMillis));
}
/**
* 默认一个文件 2W
*/
public static final int BIG_EXCEL_ROWS = 2__0000;
public static <E> void bigDataExport(List<E> list, String titleSerialN, String zipFilePath, String zipFileName) {
long timeMillis = System.currentTimeMillis();
System.out.println("开始 导出excel 时间为:" + timeMillis);
Map<String, byte[]> byteList = new HashMap<>();
//大于多少行 进行多线程操作
if (list.size() > BIG_EXCEL_ROWS) {
//页数
int pageNum = list.size() / BIG_EXCEL_ROWS;
//取余
int lastCount = list.size() % BIG_EXCEL_ROWS;
// 计算几页
int page = lastCount == 0 ? pageNum : pageNum + 1;
//倒计时锁
CountDownLatch downLatch = new CountDownLatch(page);
//定义线程池 按 page 设置线程池量
int processor = Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(page);
List<E> subList;
for (int c = 0; c <= pageNum; c++) {
int rowNum = BIG_EXCEL_ROWS;
String title = titleSerialN + "_" + (c + 1) + ".xlsx";
if (c == pageNum) {
if (lastCount == 0) {
continue;
}
subList = list.subList(c * rowNum, c * rowNum + lastCount);
} else {
subList = list.subList(c * rowNum, (c + 1) * rowNum);
}
//动态生成文件名:
executor.execute(new PageThreadPool(downLatch, title, subList, byteList));
}
try {
downLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
}
executor.shutdown();
}
System.out.println("结束 导出excel 时间为:" + (System.currentTimeMillis() - timeMillis));
if (byteList != null) {
ZipUtils.zipFileSteam(byteList, zipFilePath, zipFileName);
}
}
}
利用多线程 + SXSSFWorkbook (excel 大文件一定要用这个)生成 excel 流 放入 Map<excel文件名,excel流>
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
/**
* 导出new线程池工具类
*/
public class PageThreadPool<E> implements Runnable {
private CountDownLatch countDownLatch;
private String title;
private Map<String, byte[]> byteList;
private List<E> list;
public PageThreadPool(CountDownLatch countDownLatch, String title, List<E> list, Map<String, byte[]> byteList) {
this.countDownLatch = countDownLatch;
this.title = title;
this.list = list;
this.byteList = byteList;
}
@Override
public void run() {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
SXSSFWorkbook workbook = null;
try {
//默认100行,超100行将写入临时文件
workbook = new SXSSFWorkbook();
//压缩临时文件,很重要,否则磁盘很快就会被写满
workbook.setCompressTempFiles(true);
SXSSFSheet sheet = workbook.createSheet("sheet");
for (int j = 0; j < list.size(); j++) {
SXSSFRow row = sheet.createRow(j);
for (int k = 0; k < 10; k++) {
row.createCell(k).setCellValue(new Random().nextInt(10));
}
}
workbook.write(bos);
//put 文件名和文件字节数组
byteList.put(this.title, bos.toByteArray());
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (bos != null) {
bos.close();
}
// 删除临时文件,很重要,否则磁盘可能会被写满
if (workbook != null) {
workbook.dispose();
}
} catch (IOException e) {
e.printStackTrace();
}
if (countDownLatch != null) {
countDownLatch.countDown();
}
}
}
}
将 Map<K,V> 压缩成ZIP 流并生成本地文件
import java.io.*;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
public class ZipUtils {
/**
* 压缩工具类
*
* @param byteList 文件字节码Map,k:fileName,v:byte[]
* @return 返回压缩流
*/
/**
*
* @param byteList 文件字节码Map,k:fileName,v:byte[]
* @param zipFilePath ZIP 生成目录
* @param zipFileName ZIP 文件名
*/
public static void zipFileSteam(Map<String, byte[]> byteList,String zipFilePath,String zipFileName) {
long timeMillis = System.currentTimeMillis();
System.out.println("开始生成 ZIP 开始时间为:" + timeMillis);
//如果文件夹不存在就创建文件夹,防止报错
File file = new File(zipFilePath);
if (!file.exists() && !file.isDirectory()) {
System.out.println("文件夹不存在,创建新文件夹!");
file.mkdirs();
}
try {
FileOutputStream fileOutputStream = new FileOutputStream(String.format("%s%s%s", zipFilePath, zipFileName, ".zip"));
ZipOutputStream zipOutputStream = new ZipOutputStream(fileOutputStream);
byteList.forEach((k, v) -> {
//写入一个条目,我们需要给这个条目起个名字,相当于起一个文件名称
try {
zipOutputStream.putNextEntry(new ZipEntry(k));
zipOutputStream.write(v);
} catch (IOException e) {
e.printStackTrace();
}
});
//关闭条目
zipOutputStream.closeEntry();
zipOutputStream.close();
System.out.println("结束生成 ZIP 结束时间为:" + (System.currentTimeMillis() - timeMillis));
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 将文件转换成byte数组
*
* @param filePath 文件File类 通过new File(文件路径)
* @return byte数组
*/
public static byte[] File2byte(File filePath) {
byte[] buffer = null;
FileInputStream fis = null;
ByteArrayOutputStream bos = null;
try {
fis = new FileInputStream(filePath);
bos = new ByteArrayOutputStream();
byte[] b = new byte[1024];
int n;
while ((n = fis.read(b)) != -1) {
bos.write(b, 0, n);
}
fis.close();
bos.close();
buffer = bos.toByteArray();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (fis != null) {
fis.close();
}
if (bos != null) {
bos.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return buffer;
}
}
- 多快
导出数据条数为 :700200 条
开始 [导出Excel+打包ZIP] 时间(毫秒)为:1649140344506
开始 导出excel 时间(毫秒)为:1649140344506
结束 导出excel 时间(毫秒)为:9581
开始生成 ZIP 开始时间(毫秒)为:1649140354097
结束生成 ZIP 结束时间(毫秒)为:2258
结束 [导出Excel+打包ZIP] 时间(毫秒)为:11849