POI或者JXL在导出大量数据的时候,由于它们将每一个单元格生都成一个Cell对象,所以很容易导致内存溢出。解决这个问题,唯一的办法是弄清楚Excel的二进制格式(汗),并且用流的方式读写Excel。POI和JXL其实提供了二进制方式读写Excel的API,只是因为缺少文档和实例,所以使用的人不多。我编写了这个简单的合并Excel的类,它只适合合并结构相同的多个Excel文件。好在这个功能已经可以解决数据导出产生OOM的问题:将数据分批导出然后合并。 
下面的代码使用POI3.1,合并11个3000多行的文档用时约6秒,我实在找不到更多的测试用的文档了。


Java代码 

1. @SuppressWarnings("unchecked")  
2. public class XlsMergeUtil {  
3. private static Logger logger = LoggerFactory.getLogger(XlsMergeUtil.class);  
4.   
5. /**
6.    * 将多个Xls文件合并为一个,适用于只有一个sheet,并且格式相同的文档
7.    * @param inputs 输入的Xls文件
8.    * @param out 输出文件
9.    */  
10. public static void merge(InputStream[] inputs, OutputStream out) {  
11. if (inputs == null || inputs.length <= 1) {  
12. throw new IllegalArgumentException("没有传入输入流数组,或只有一个输入流.");  
13.     }  
14.   
15. 0]);  
16.     Workbook workbook = Workbook.createWorkbook(rootRecords);  
17.     List<Sheet> sheets = getSheets(workbook, rootRecords);  
18. if(sheets == null || sheets.size() == 0) {  
19. throw new IllegalArgumentException("第一篇文档的格式错误,必须有至少一个sheet");  
20.     }  
21. //以第一篇文档的最后一个sheet为根,以后的数据都追加在这个sheet后面  
22. 1);   
23. int rootRows = getRowsOfSheet(rootSheet); //记录第一篇文档的行数,以后的行数在此基础上增加  
24.     rootSheet.setLoc(rootSheet.getDimsLoc());  
25. new HashMap(10000);  
26.   
27. for (int i = 1; i < inputs.length; i++) { //从第二篇开始遍历  
28.       List<Record> records = getRecords(inputs[i]);  
29. int rowsOfCurXls = 0;  
30. //遍历当前文档的每一个record  
31. for (Iterator itr = records.iterator(); itr.hasNext();) {  
32.         Record record = (Record) itr.next();  
33. if (record.getSid() == RowRecord.sid) { //如果是RowRecord  
34.           RowRecord rowRecord = (RowRecord) record;  
35. //调整行号  
36.           rowRecord.setRowNumber(rootRows + rowRecord.getRowNumber());  
37. //追加Row  
38. //记录当前文档的行数  
39.         }  
40. //SST记录,SST保存xls文件中唯一的String,各个String都是对应着SST记录的索引  
41. else if (record.getSid() == SSTRecord.sid) {  
42.           SSTRecord sstRecord = (SSTRecord) record;  
43. for (int j = 0; j < sstRecord.getNumUniqueStrings(); j++) {  
44. int index = workbook.addSSTString(sstRecord.getString(j));  
45. //记录原来的索引和现在的索引的对应关系  
46.             map.put(Integer.valueOf(j), Integer.valueOf(index));  
47.           }  
48. else if (record.getSid() == LabelSSTRecord.sid) {  
49.           LabelSSTRecord label = (LabelSSTRecord) record;  
50. //调整SST索引的对应关系  
51.           label.setSSTIndex(map.get(Integer.valueOf(label.getSSTIndex())));  
52.         }  
53. //追加ValueCell  
54. if (record instanceof CellValueRecordInterface) {  
55.           CellValueRecordInterface cell = (CellValueRecordInterface) record;  
56. int cellRow = cell.getRow() + rootRows;  
57.           cell.setRow(cellRow);  
58.           rootSheet.addValueRecord(cellRow, cell);  
59.         }  
60.       }  
61.       rootRows += rowsOfCurXls;  
62.     }  
63. byte[] data = getBytes(workbook, sheets.toArray(new Sheet[0]));  
64.     write(out, data);  
65.   }  
66.   
67. static void write(OutputStream out, byte[] data) {  
68. new POIFSFileSystem();  
69. // Write out the Workbook stream  
70. try {  
71. new ByteArrayInputStream(data), "Workbook");  
72.       fs.writeFilesystem(out);  
73.       out.flush();  
74. catch (IOException e) {  
75.       e.printStackTrace();  
76. finally {  
77. try {  
78.         out.close();  
79. catch (IOException e) {  
80.         e.printStackTrace();  
81.       }  
82.     }  
83.   }  
84.   
85. static List<Sheet> getSheets(Workbook workbook, List records) {  
86. int recOffset = workbook.getNumRecords();  
87. int sheetNum = 0;  
88.   
89. // convert all LabelRecord records to LabelSSTRecord  
90.     convertLabelRecords(records, recOffset, workbook);  
91. new ArrayList();  
92. while (recOffset < records.size()) {  
93.       Sheet sh = Sheet.createSheet(records, sheetNum++, recOffset);  
94.   
95. 1;  
96. if (recOffset == 1) {  
97. break;  
98.       }  
99.       sheets.add(sh);  
100.     }  
101. return sheets;  
102.   }  
103.   
104. static int getRows(List<Record> records) {  
105. int row = 0;  
106. for (Iterator itr = records.iterator(); itr.hasNext();) {  
107.       Record record = (Record) itr.next();  
108. if (record.getSid() == RowRecord.sid) {  
109.         row++;  
110.       }  
111.     }  
112. return row;  
113.   }  
114.     
115. static int getRowsOfSheet(Sheet sheet) {  
116. int rows = 0;  
117. 0);  
118. while(sheet.getNextRow() != null) {  
119.       rows++;  
120.     }  
121. return rows;  
122.   }  
123.   
124. @SuppressWarnings("deprecation")  
125. static List<Record> getRecords(InputStream input) {  
126. try {  
127. new POIFSFileSystem(input);  
128. "Workbook");  
129. return org.apache.poi.hssf.record.RecordFactory.createRecords(stream);  
130. catch (IOException e) {  
131. "IO异常:{}", e.getMessage());  
132.       e.printStackTrace();  
133.     }  
134. return Collections.EMPTY_LIST;  
135.   }  
136.   
137. static void convertLabelRecords(List records, int offset, Workbook workbook) {  
138.   
139. for (int k = offset; k < records.size(); k++) {  
140.       Record rec = (Record) records.get(k);  
141.   
142. if (rec.getSid() == LabelRecord.sid) {  
143.         LabelRecord oldrec = (LabelRecord) rec;  
144.   
145.         records.remove(k);  
146. new LabelSSTRecord();  
147. int stringid = workbook.addSSTString(new UnicodeString(oldrec.getValue()));  
148.   
149.         newrec.setRow(oldrec.getRow());  
150.         newrec.setColumn(oldrec.getColumn());  
151.         newrec.setXFIndex(oldrec.getXFIndex());  
152.         newrec.setSSTIndex(stringid);  
153.         records.add(k, newrec);  
154.       }  
155.     }  
156.   }  
157.   
158. public static byte[] getBytes(Workbook workbook, Sheet[] sheets) {  
159. // HSSFSheet[] sheets = getSheets();  
160. int nSheets = sheets.length;  
161.   
162. // before getting the workbook size we must tell the sheets that  
163. // serialization is about to occur.  
164. for (int i = 0; i < nSheets; i++) {  
165.       sheets[i].preSerialize();  
166.     }  
167.   
168. int totalsize = workbook.getSize();  
169.   
170. // pre-calculate all the sheet sizes and set BOF indexes  
171. int[] estimatedSheetSizes = new int[nSheets];  
172. for (int k = 0; k < nSheets; k++) {  
173.       workbook.setSheetBof(k, totalsize);  
174. int sheetSize = sheets[k].getSize();  
175.       estimatedSheetSizes[k] = sheetSize;  
176.       totalsize += sheetSize;  
177.     }  
178.   
179. byte[] retval = new byte[totalsize];  
180. int pos = workbook.serialize(0, retval);  
181.   
182. for (int k = 0; k < nSheets; k++) {  
183. int serializedSize = sheets[k].serialize(pos, retval);  
184. if (serializedSize != estimatedSheetSizes[k]) {  
185. throw new IllegalStateException("Actual serialized sheet size (" + serializedSize  
186. ") differs from pre-calculated size (" + estimatedSheetSizes[k] + ") for sheet (" + k  
187. ")");  
188.         Sheet.serializeIndexRecord() does not  
189.       }  
190.       pos += serializedSize;  
191.     }  
192. return retval;  
193.   }  
194.   
195. public static void main(String[] args) throws Exception {  
196. final String PATH = "E:\\projects\\java\\ws_0\\export\\data\\";  
197. new InputStream[10];  
198. 0] = new java.io.FileInputStream(PATH + "07_10.xls");  
199. for(int i = 1; i <= 9; i++) {  
200. new java.io.FileInputStream(PATH + "07_0" + i + ".xls");  
201.     }  
202. new FileOutputStream(PATH + "xx.xls");  
203. long t1 = System.currentTimeMillis();  
204.     merge(inputs, out);  
205. //简陋的测试一下时间  
206.   }  
207.   
208. }


后面的跟帖


leasass 写道


讲讲我的解决方法, 
一般导出的Excel用来做报表或者统计用的,也不要求图片或其它对象,就是数据, 
我的做法是写文本格式的Excel文件,而不是用POI等生成二进制的文件, 
第一种格式,CSV,最简单的,格式最差,最基本的行列,不能合并,不能设置着色, 
第二种,HTML格式的,如:"<TABLE>....</TABLE>"这样的文本,后辍名改为XLS就可以了,可以设置跨行列的合并,可以着色,图片没试过,估计是可以的,还可以设置单元格对齐,单元格的格式等, 

写文本的时候,根本不用担心 OOM的问题,我最大写过 500多M的一个excel文件,不过这已经没有意义了,excel一个Sheet最大6万多行,多了也显示不出来. 


你的办法我也用过,的确没有溢出的问题,不过客户不同意的。我们现在的这个的客户很各色,以前其他的客户就可以用你的办法搞定。比如客户要求一次导出20w行数据,这就要求导出的文件必须带sheet,上面的代码也不支持sheet,下面的就可以了: 



Java代码 





    1. @SuppressWarnings("unchecked")  
    2. public class XlsMergeUtil {  
    3. private static Logger logger = LoggerFactory.getLogger(XlsMergeUtil.class);  
    4.   
    5. /**
    6.    * 将多个Xls文件合并为一个,适用于只有一个sheet,并且格式相同的文档
    7.    * @param inputs 输入的Xls文件,第一个XLS文件必须给出足够sheet空间
    8.    * 例如,总共200000行数据,第一个文件至少3个空白sheet
    9.    * @param out 输出文件
    10.    */  
    11. public static void merge(InputStream[] inputs, OutputStream out) {  
    12. if (inputs == null || inputs.length <= 1) {  
    13. throw new IllegalArgumentException("没有传入输入流数组,或只有一个输入流.");  
    14.     }  
    15.   
    16. 0]);  
    17.     Workbook workbook = Workbook.createWorkbook(rootRecords);  
    18.     List<Sheet> sheets = getSheets(workbook, rootRecords);  
    19. if(sheets == null || sheets.size() == 0) {  
    20. throw new IllegalArgumentException("第一篇文档的格式错误,必须有至少一个sheet");  
    21.     }  
    22. //以第一篇文档的第一个sheet为根,以后的数据都追加在这个sheet后面  
    23. 0);   
    24. int rootRows = getRowsOfSheet(rootSheet); //记录第一篇文档的行数,以后的行数在此基础上增加  
    25.     rootSheet.setLoc(rootSheet.getDimsLoc());  
    26. new HashMap(10000);  
    27. int sheetIndex = 0;  
    28.       
    29. for (int i = 1; i < inputs.length; i++) { //从第二篇开始遍历  
    30.       List<Record> records = getRecords(inputs[i]);  
    31. //达到最大行数限制,换一个sheet  
    32. if(getRows(records) + rootRows >= RowRecord.MAX_ROW_NUMBER) {  
    33. if((++sheetIndex) > (sheets.size() - 1)) {  
    34. "第一个文档给出的sheets小于需要的数量,部分数据未能合并.");  
    35. break;  
    36.         }  
    37.         rootSheet = sheets.get(sheetIndex);  
    38.         rootRows = getRowsOfSheet(rootSheet);  
    39.         rootSheet.setLoc(rootSheet.getDimsLoc());  
    40. "切换Sheet{}", sheetIndex);  
    41.       }  
    42. int rowsOfCurXls = 0;  
    43. //遍历当前文档的每一个record  
    44. for (Iterator itr = records.iterator(); itr.hasNext();) {  
    45.         Record record = (Record) itr.next();  
    46. if (record.getSid() == RowRecord.sid) { //如果是RowRecord  
    47.           RowRecord rowRecord = (RowRecord) record;  
    48. //调整行号  
    49.           rowRecord.setRowNumber(rootRows + rowRecord.getRowNumber());  
    50. //追加Row  
    51. //记录当前文档的行数  
    52.         }  
    53. //SST记录,SST保存xls文件中唯一的String,各个String都是对应着SST记录的索引  
    54. else if (record.getSid() == SSTRecord.sid) {  
    55.           SSTRecord sstRecord = (SSTRecord) record;  
    56. for (int j = 0; j < sstRecord.getNumUniqueStrings(); j++) {  
    57. int index = workbook.addSSTString(sstRecord.getString(j));  
    58. //记录原来的索引和现在的索引的对应关系  
    59.             map.put(Integer.valueOf(j), Integer.valueOf(index));  
    60.           }  
    61. else if (record.getSid() == LabelSSTRecord.sid) {  
    62.           LabelSSTRecord label = (LabelSSTRecord) record;  
    63. //调整SST索引的对应关系  
    64.           label.setSSTIndex(map.get(Integer.valueOf(label.getSSTIndex())));  
    65.         }  
    66. //追加ValueCell  
    67. if (record instanceof CellValueRecordInterface) {  
    68.           CellValueRecordInterface cell = (CellValueRecordInterface) record;  
    69. int cellRow = cell.getRow() + rootRows;  
    70.           cell.setRow(cellRow);  
    71.           rootSheet.addValueRecord(cellRow, cell);  
    72.         }  
    73.       }  
    74.       rootRows += rowsOfCurXls;  
    75.     }  
    76.       
    77. byte[] data = getBytes(workbook, sheets.toArray(new Sheet[0]));  
    78.     write(out, data);  
    79.   }  
    80.   
    81. static void write(OutputStream out, byte[] data) {  
    82. new POIFSFileSystem();  
    83. // Write out the Workbook stream  
    84. try {  
    85. new ByteArrayInputStream(data), "Workbook");  
    86.       fs.writeFilesystem(out);  
    87.       out.flush();  
    88. catch (IOException e) {  
    89.       e.printStackTrace();  
    90. finally {  
    91. try {  
    92.         out.close();  
    93. catch (IOException e) {  
    94.         e.printStackTrace();  
    95.       }  
    96.     }  
    97.   }  
    98.   
    99. static List<Sheet> getSheets(Workbook workbook, List records) {  
    100. int recOffset = workbook.getNumRecords();  
    101. int sheetNum = 0;  
    102.   
    103. // convert all LabelRecord records to LabelSSTRecord  
    104.     convertLabelRecords(records, recOffset, workbook);  
    105. new ArrayList();  
    106. while (recOffset < records.size()) {  
    107.       Sheet sh = Sheet.createSheet(records, sheetNum++, recOffset);  
    108.   
    109. 1;  
    110. if (recOffset == 1) {  
    111. break;  
    112.       }  
    113.       sheets.add(sh);  
    114.     }  
    115. return sheets;  
    116.   }  
    117.   
    118. static int getRows(List<Record> records) {  
    119. int row = 0;  
    120. for (Iterator itr = records.iterator(); itr.hasNext();) {  
    121.       Record record = (Record) itr.next();  
    122. if (record.getSid() == RowRecord.sid) {  
    123.         row++;  
    124.       }  
    125.     }  
    126. return row;  
    127.   }  
    128.     
    129. static int getRowsOfSheet(Sheet sheet) {  
    130. int rows = 0;  
    131. 0);  
    132. while(sheet.getNextRow() != null) {  
    133.       rows++;  
    134.     }  
    135. return rows;  
    136.   }  
    137.   
    138. @SuppressWarnings("deprecation")  
    139. static List<Record> getRecords(InputStream input) {  
    140. try {  
    141. new POIFSFileSystem(input);  
    142. "Workbook");  
    143. return org.apache.poi.hssf.record.RecordFactory.createRecords(stream);  
    144. catch (IOException e) {  
    145. "IO异常:{}", e.getMessage());  
    146.       e.printStackTrace();  
    147.     }  
    148. return Collections.EMPTY_LIST;  
    149.   }  
    150.   
    151. static void convertLabelRecords(List records, int offset, Workbook workbook) {  
    152.   
    153. for (int k = offset; k < records.size(); k++) {  
    154.       Record rec = (Record) records.get(k);  
    155.   
    156. if (rec.getSid() == LabelRecord.sid) {  
    157.         LabelRecord oldrec = (LabelRecord) rec;  
    158.   
    159.         records.remove(k);  
    160. new LabelSSTRecord();  
    161. int stringid = workbook.addSSTString(new UnicodeString(oldrec.getValue()));  
    162.   
    163.         newrec.setRow(oldrec.getRow());  
    164.         newrec.setColumn(oldrec.getColumn());  
    165.         newrec.setXFIndex(oldrec.getXFIndex());  
    166.         newrec.setSSTIndex(stringid);  
    167.         records.add(k, newrec);  
    168.       }  
    169.     }  
    170.   }  
    171.   
    172. public static byte[] getBytes(Workbook workbook, Sheet[] sheets) {  
    173. // HSSFSheet[] sheets = getSheets();  
    174. int nSheets = sheets.length;  
    175.   
    176. // before getting the workbook size we must tell the sheets that  
    177. // serialization is about to occur.  
    178. for (int i = 0; i < nSheets; i++) {  
    179.       sheets[i].preSerialize();  
    180.     }  
    181.   
    182. int totalsize = workbook.getSize();  
    183. // pre-calculate all the sheet sizes and set BOF indexes  
    184. int[] estimatedSheetSizes = new int[nSheets];  
    185. for (int k = 0; k < nSheets; k++) {  
    186.       workbook.setSheetBof(k, totalsize);  
    187. int sheetSize = sheets[k].getSize();  
    188.       estimatedSheetSizes[k] = sheetSize;  
    189.       totalsize += sheetSize;  
    190.     }  
    191. "分配内存{}bytes", totalsize);  
    192. byte[] retval = new byte[totalsize];  
    193. int pos = workbook.serialize(0, retval);  
    194.   
    195. for (int k = 0; k < nSheets; k++) {  
    196. int serializedSize = sheets[k].serialize(pos, retval);  
    197. if (serializedSize != estimatedSheetSizes[k]) {  
    198. // Wrong offset values have been passed in the call to setSheetBof() above.  
    199. // For books with more than one sheet, this discrepancy would cause excel  
    200. // to report errors and loose data while reading the workbook  
    201. throw new IllegalStateException("Actual serialized sheet size (" + serializedSize  
    202. ") differs from pre-calculated size (" + estimatedSheetSizes[k] + ") for sheet (" + k  
    203. ")");  
    204. // TODO - add similar sanity check to ensure that Sheet.serializeIndexRecord() does not  
    205. // write mis-aligned offsets either  
    206.       }  
    207.       pos += serializedSize;  
    208.     }  
    209. return retval;  
    210.   }  
    211.   
    212. public static void main(String[] args) throws Exception {  
    213. final String PATH = "E:\\projects\\java\\ws_0\\export\\data\\";  
    214. new InputStream[25];  
    215. 0] = new java.io.FileInputStream(PATH + "07_10.xls");  
    216. for(int i = 1; i < 25 ; i++) {  
    217. new java.io.FileInputStream(PATH + "07_01.xls");  
    218.     }  
    219. new FileOutputStream(PATH + "xx.xls");  
    220. long t1 = System.currentTimeMillis();  
    221.     merge(inputs, out);  
    222.     System.out.println(System.currentTimeMillis() - t1);  
    223.   }  
    224.   
    225. }