POI或者JXL在导出大量数据的时候,由于它们将每一个单元格生都成一个Cell对象,所以很容易导致内存溢出。解决这个问题,唯一的办法是弄清楚Excel的二进制格式(汗),并且用流的方式读写Excel。POI和JXL其实提供了二进制方式读写Excel的API,只是因为缺少文档和实例,所以使用的人不多。我编写了这个简单的合并Excel的类,它只适合合并结构相同的多个Excel文件。好在这个功能已经可以解决数据导出产生OOM的问题:将数据分批导出然后合并。
下面的代码使用POI3.1,合并11个3000多行的文档用时约6秒,我实在找不到更多的测试用的文档了。
1. @SuppressWarnings("unchecked")
2. public class XlsMergeUtil {
3. private static Logger logger = LoggerFactory.getLogger(XlsMergeUtil.class);
4.
5. /**
6. * 将多个Xls文件合并为一个,适用于只有一个sheet,并且格式相同的文档
7. * @param inputs 输入的Xls文件
8. * @param out 输出文件
9. */
10. public static void merge(InputStream[] inputs, OutputStream out) {
11. if (inputs == null || inputs.length <= 1) {
12. throw new IllegalArgumentException("没有传入输入流数组,或只有一个输入流.");
13. }
14.
15. 0]);
16. Workbook workbook = Workbook.createWorkbook(rootRecords);
17. List<Sheet> sheets = getSheets(workbook, rootRecords);
18. if(sheets == null || sheets.size() == 0) {
19. throw new IllegalArgumentException("第一篇文档的格式错误,必须有至少一个sheet");
20. }
21. //以第一篇文档的最后一个sheet为根,以后的数据都追加在这个sheet后面
22. 1);
23. int rootRows = getRowsOfSheet(rootSheet); //记录第一篇文档的行数,以后的行数在此基础上增加
24. rootSheet.setLoc(rootSheet.getDimsLoc());
25. new HashMap(10000);
26.
27. for (int i = 1; i < inputs.length; i++) { //从第二篇开始遍历
28. List<Record> records = getRecords(inputs[i]);
29. int rowsOfCurXls = 0;
30. //遍历当前文档的每一个record
31. for (Iterator itr = records.iterator(); itr.hasNext();) {
32. Record record = (Record) itr.next();
33. if (record.getSid() == RowRecord.sid) { //如果是RowRecord
34. RowRecord rowRecord = (RowRecord) record;
35. //调整行号
36. rowRecord.setRowNumber(rootRows + rowRecord.getRowNumber());
37. //追加Row
38. //记录当前文档的行数
39. }
40. //SST记录,SST保存xls文件中唯一的String,各个String都是对应着SST记录的索引
41. else if (record.getSid() == SSTRecord.sid) {
42. SSTRecord sstRecord = (SSTRecord) record;
43. for (int j = 0; j < sstRecord.getNumUniqueStrings(); j++) {
44. int index = workbook.addSSTString(sstRecord.getString(j));
45. //记录原来的索引和现在的索引的对应关系
46. map.put(Integer.valueOf(j), Integer.valueOf(index));
47. }
48. else if (record.getSid() == LabelSSTRecord.sid) {
49. LabelSSTRecord label = (LabelSSTRecord) record;
50. //调整SST索引的对应关系
51. label.setSSTIndex(map.get(Integer.valueOf(label.getSSTIndex())));
52. }
53. //追加ValueCell
54. if (record instanceof CellValueRecordInterface) {
55. CellValueRecordInterface cell = (CellValueRecordInterface) record;
56. int cellRow = cell.getRow() + rootRows;
57. cell.setRow(cellRow);
58. rootSheet.addValueRecord(cellRow, cell);
59. }
60. }
61. rootRows += rowsOfCurXls;
62. }
63. byte[] data = getBytes(workbook, sheets.toArray(new Sheet[0]));
64. write(out, data);
65. }
66.
67. static void write(OutputStream out, byte[] data) {
68. new POIFSFileSystem();
69. // Write out the Workbook stream
70. try {
71. new ByteArrayInputStream(data), "Workbook");
72. fs.writeFilesystem(out);
73. out.flush();
74. catch (IOException e) {
75. e.printStackTrace();
76. finally {
77. try {
78. out.close();
79. catch (IOException e) {
80. e.printStackTrace();
81. }
82. }
83. }
84.
85. static List<Sheet> getSheets(Workbook workbook, List records) {
86. int recOffset = workbook.getNumRecords();
87. int sheetNum = 0;
88.
89. // convert all LabelRecord records to LabelSSTRecord
90. convertLabelRecords(records, recOffset, workbook);
91. new ArrayList();
92. while (recOffset < records.size()) {
93. Sheet sh = Sheet.createSheet(records, sheetNum++, recOffset);
94.
95. 1;
96. if (recOffset == 1) {
97. break;
98. }
99. sheets.add(sh);
100. }
101. return sheets;
102. }
103.
104. static int getRows(List<Record> records) {
105. int row = 0;
106. for (Iterator itr = records.iterator(); itr.hasNext();) {
107. Record record = (Record) itr.next();
108. if (record.getSid() == RowRecord.sid) {
109. row++;
110. }
111. }
112. return row;
113. }
114.
115. static int getRowsOfSheet(Sheet sheet) {
116. int rows = 0;
117. 0);
118. while(sheet.getNextRow() != null) {
119. rows++;
120. }
121. return rows;
122. }
123.
124. @SuppressWarnings("deprecation")
125. static List<Record> getRecords(InputStream input) {
126. try {
127. new POIFSFileSystem(input);
128. "Workbook");
129. return org.apache.poi.hssf.record.RecordFactory.createRecords(stream);
130. catch (IOException e) {
131. "IO异常:{}", e.getMessage());
132. e.printStackTrace();
133. }
134. return Collections.EMPTY_LIST;
135. }
136.
137. static void convertLabelRecords(List records, int offset, Workbook workbook) {
138.
139. for (int k = offset; k < records.size(); k++) {
140. Record rec = (Record) records.get(k);
141.
142. if (rec.getSid() == LabelRecord.sid) {
143. LabelRecord oldrec = (LabelRecord) rec;
144.
145. records.remove(k);
146. new LabelSSTRecord();
147. int stringid = workbook.addSSTString(new UnicodeString(oldrec.getValue()));
148.
149. newrec.setRow(oldrec.getRow());
150. newrec.setColumn(oldrec.getColumn());
151. newrec.setXFIndex(oldrec.getXFIndex());
152. newrec.setSSTIndex(stringid);
153. records.add(k, newrec);
154. }
155. }
156. }
157.
158. public static byte[] getBytes(Workbook workbook, Sheet[] sheets) {
159. // HSSFSheet[] sheets = getSheets();
160. int nSheets = sheets.length;
161.
162. // before getting the workbook size we must tell the sheets that
163. // serialization is about to occur.
164. for (int i = 0; i < nSheets; i++) {
165. sheets[i].preSerialize();
166. }
167.
168. int totalsize = workbook.getSize();
169.
170. // pre-calculate all the sheet sizes and set BOF indexes
171. int[] estimatedSheetSizes = new int[nSheets];
172. for (int k = 0; k < nSheets; k++) {
173. workbook.setSheetBof(k, totalsize);
174. int sheetSize = sheets[k].getSize();
175. estimatedSheetSizes[k] = sheetSize;
176. totalsize += sheetSize;
177. }
178.
179. byte[] retval = new byte[totalsize];
180. int pos = workbook.serialize(0, retval);
181.
182. for (int k = 0; k < nSheets; k++) {
183. int serializedSize = sheets[k].serialize(pos, retval);
184. if (serializedSize != estimatedSheetSizes[k]) {
185. throw new IllegalStateException("Actual serialized sheet size (" + serializedSize
186. ") differs from pre-calculated size (" + estimatedSheetSizes[k] + ") for sheet (" + k
187. ")");
188. Sheet.serializeIndexRecord() does not
189. }
190. pos += serializedSize;
191. }
192. return retval;
193. }
194.
195. public static void main(String[] args) throws Exception {
196. final String PATH = "E:\\projects\\java\\ws_0\\export\\data\\";
197. new InputStream[10];
198. 0] = new java.io.FileInputStream(PATH + "07_10.xls");
199. for(int i = 1; i <= 9; i++) {
200. new java.io.FileInputStream(PATH + "07_0" + i + ".xls");
201. }
202. new FileOutputStream(PATH + "xx.xls");
203. long t1 = System.currentTimeMillis();
204. merge(inputs, out);
205. //简陋的测试一下时间
206. }
207.
208. }
后面的跟帖
leasass 写道 讲讲我的解决方法, 你的办法我也用过,的确没有溢出的问题,不过客户不同意的。我们现在的这个的客户很各色,以前其他的客户就可以用你的办法搞定。比如客户要求一次导出20w行数据,这就要求导出的文件必须带sheet,上面的代码也不支持sheet,下面的就可以了:
|