POI生成WORD文档
POI为Java系处理office文档的比较优秀的开源库,其中对于Excel的处理最为优秀,文档也写的很详细。不过很多网友都认为它在word文档处理方面就逊色很多,不过对于我本次的完成文档的生成我依然选择了POI。
需要完成功能
- 配置Word模板文件,包括表格
- 解析配置的Word文档,返回配置的特殊标记
- 构造数据,替换配置的标签,以及生成表格
配置word模版
采用${xx}方式配置标签,如果是表格在对应一行一列配置表格名称
注意在word文档中,如果两个相近的字符样式不同,word默认会保存在不同的RUN
元素中,由此很多朋友在配置好以后都需要保存为一个单独的文件,然后不把不在一起的标签合并到一个RUN
元素中,如果文件比较大,我相信这绝对是一个比较痛苦的事情,这里将会侧重处理这个问题.我的解决方案是只保留第一RUN
的样式其他的删掉
解析word模板
XWPFDocument
对象,可以通过流的当时,也可以通过opcpackage
,不过如果使用opcpackage
打开的方式,打开的文件和最终生成的文件不能够是同一个文件,我这里采用文件流的方式
public XWPFDocument openDocument() {
XWPFDocument xdoc = null;
InputStream is = null;
try {
is = new FileInputStream(saveFile);
xdoc = new XWPFDocument(is);
} catch (IOException e) {
e.printStackTrace();
}
return xdoc;
}
XWPFDocument
对象有当前所有段落以及表格,这里暂不考虑表格嵌套表格的情况,每个段落的文本信息是可以通过p.getText()
获取,获取段落中文档配置信息如下:
// 获取段落集合中所有文本
public List<TagInfo> getWordTag(XWPFDocument doc, String regex) {
List<TagInfo> tags = new ArrayList<TagInfo>();
// 普通段落
List<XWPFParagraph> pars = doc.getParagraphs();
for (int i = 0; i < pars.size(); i++) {
XWPFParagraph p = pars.get(i);
setTagInfoList(tags, p, regex);
}
// Table中段落
List<XWPFTable> commTables = getDocTables(doc, false, regex);
for (XWPFTable table : commTables) {
List<XWPFParagraph> tparags = getTableParagraph(table);
for (int i = 0; i < tparags.size(); i++) {
XWPFParagraph p = tparags.get(i);
setTagInfoList(tags, p, regex);
}
}
return tags;
}
获取文本后通过正则解析,并依次保存到TagInfo中
// 向 taglist中添加新解析的段落信息
private void setTagInfoList(List<TagInfo> list, XWPFParagraph p,
String regex) {
if (regex == "")
regex = defaultRegex;
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(p.getText());
int startPosition = 0;
while (matcher.find(startPosition)) {
String match = matcher.group();
if (!list.contains(new TagInfo(match, match, ""))) {
list.add(new TagInfo(match, match, ""));
}
startPosition = matcher.end();
}
}
解析表格
// 获取Table列表中的配置信息
public Map<String, List<List<TagInfo>>> getTableTag(XWPFDocument doc,
String regex) {
Map<String, List<List<TagInfo>>> mapList = new HashMap<String, List<List<TagInfo>>>();
List<XWPFTable> lstTables = getDocTables(doc, true, regex);
for (XWPFTable table : lstTables) {
// 获取每个表格第一个单元格,以及最后一行
String strTableName = getTableListName(table, regex);
List<List<TagInfo>> list = new ArrayList<List<TagInfo>>();
List<TagInfo> lstTag = new ArrayList<TagInfo>();
int rowSize = table.getRows().size();
XWPFTableRow lastRow = table.getRow(rowSize - 1);
for (XWPFTableCell cell : lastRow.getTableCells()) {
for (XWPFParagraph p : cell.getParagraphs()) {
// 去掉空白字符串
if (p.getText() != null && p.getText().length() > 0) {
setTagInfoList(lstTag, p, regex);
}
}
}
list.add(lstTag);
// 添加到数据集
mapList.put(strTableName, list);
}
return mapList;
}
生成WORD文档
难点替换标签
传入数据格式包含三个formtag以及一个tableTag
{"formTags":
[{"TagName":"${xxxx}","TagText":"${xxxx}","TagValue":""},
{"TagName":"${123}","TagText":"${123}","TagValue":""},
{"TagName":"${ddd}","TagText":"${ddd}","TagValue":""}],
"tableTags":{
"${table}":[
[{"TagName":"${COL1}","TagText":"${COL1}","TagValue":""},{"TagName":"${COL2}","TagText":"${COL2}","TagValue":""}]
]}
}
POI中提供searchText方法,返回Tag所有所在的RUN
标签,通过一个字符做比较,如果找的第一个匹配的文本开始计数,所有在当前条件下类型 $${xxx}这样的标签是无法实现替换的
替换普通文本Tag
public void ReplaceInParagraph(List<TagInfo> tagList, XWPFParagraph para,
String regex) {
if (regex == "")
regex = defaultRegex;
List<XWPFRun> runs = para.getRuns();
for (TagInfo ti : tagList) {
String find = ti.TagText;
String replValue = ti.TagValue;
TextSegement found = para.searchText(find,
new PositionInParagraph());
if (found != null) {
// 判断查找内容是否在同一个Run标签中
if (found.getBeginRun() == found.getEndRun()) {
XWPFRun run = runs.get(found.getBeginRun());
String runText = run.getText(run.getTextPosition());
String replaced = runText.replace(find, replValue);
run.setText(replaced, 0);
} else {
// 存在多个Run标签
StringBuilder sb = new StringBuilder();
for (int runPos = found.getBeginRun(); runPos <= found
.getEndRun(); runPos++) {
XWPFRun run = runs.get(runPos);
sb.append(run.getText((run.getTextPosition())));
}
String connectedRuns = sb.toString();
String replaced = connectedRuns.replace(find, replValue);
XWPFRun firstRun = runs.get(found.getBeginRun());
firstRun.setText(replaced, 0);
// 删除后边的run标签
for (int runPos = found.getBeginRun() + 1; runPos <= found
.getEndRun(); runPos++) {
// 清空其他标签内容
XWPFRun partNext = runs.get(runPos);
partNext.setText("", 0);
}
}
}
}
// 完成第一遍查找,检测段落中的标签是否已经替换完
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(para.getText());
boolean find = matcher.find();
if (find) {
ReplaceInParagraph(tagList, para, regex);
find = false;
}
}
表格主要是通过复制模版行,然后对模版行中的内容做修改
复制文本标签RUN
private void CopyRun(XWPFRun target, XWPFRun source) {
target.getCTR().setRPr(source.getCTR().getRPr());
// 设置文本
target.setText(source.text());
}
XWPFParagraph
private void copyParagraph(XWPFParagraph target, XWPFParagraph source) {
// 设置段落样式
target.getCTP().setPPr(source.getCTP().getPPr());
// 添加Run标签
for (int pos = 0; pos < target.getRuns().size(); pos++) {
target.removeRun(pos);
}
for (XWPFRun s : source.getRuns()) {
XWPFRun targetrun = target.createRun();
CopyRun(targetrun, s);
}
}
XWPFTableCell
private void copyTableCell(XWPFTableCell target, XWPFTableCell source) {
// 列属性
target.getCTTc().setTcPr(source.getCTTc().getTcPr());
// 删除目标 targetCell 所有单元格
for (int pos = 0; pos < target.getParagraphs().size(); pos++) {
target.removeParagraph(pos);
}
// 添加段落
for (XWPFParagraph sp : source.getParagraphs()) {
XWPFParagraph targetP = target.addParagraph();
copyParagraph(targetP, sp);
}
}
XWPFTableRow
private void CopytTableRow(XWPFTableRow target, XWPFTableRow source) {
// 复制样式
target.getCtRow().setTrPr(source.getCtRow().getTrPr());
// 复制单元格
for (int i = 0; i < target.getTableCells().size(); i++) {
copyTableCell(target.getCell(i), source.getCell(i));
}
}
完整代码
1 import java.io.File;
2 import java.io.FileInputStream;
3 import java.io.FileNotFoundException;
4 import java.io.FileOutputStream;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.io.OutputStream;
8 import java.nio.channels.FileChannel;
9 import java.util.ArrayList;
10 import java.util.HashMap;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15
16 import org.apache.poi.xwpf.usermodel.PositionInParagraph;
17 import org.apache.poi.xwpf.usermodel.TextSegement;
18 import org.apache.poi.xwpf.usermodel.XWPFDocument;
19 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
20 import org.apache.poi.xwpf.usermodel.XWPFRun;
21 import org.apache.poi.xwpf.usermodel.XWPFTable;
22 import org.apache.poi.xwpf.usermodel.XWPFTableRow;
23 import org.apache.poi.xwpf.usermodel.XWPFTableCell;
24
25 public class WordAnalysis {
26
27 private final String defaultRegex = "\\$\\{[^{}]+\\}";
28 private String tempFile;
29 private String saveFile;
30
31 @SuppressWarnings("resource")
32 private void CopyFile() throws IOException {
33 File tFile = new File(saveFile);
34 tFile.deleteOnExit();
35 if (!tFile.getParentFile().exists()) {
36 // 目标文件所在目录不存在
37 tFile.getParentFile().mkdirs();
38 }
39 FileInputStream inStream = new FileInputStream(tempFile);
40 FileOutputStream outStream = new FileOutputStream(tFile);
41 FileChannel inC = inStream.getChannel();
42 FileChannel outC = outStream.getChannel();
43 int length = 2097152;
44 while (true) {
45 if (inC.position() == inC.size()) {
46 inC.close();
47 outC.close();
48 tFile = null;
49 inC = null;
50 outC = null;
51 break;
52 }
53 if ((inC.size() - inC.position()) < 20971520)
54 length = (int) (inC.size() - inC.position());
55 else
56 length = 20971520;
57 inC.transferTo(inC.position(), length, outC);
58 inC.position(inC.position() + length);
59 }
60
61 };
62
63 public WordAnalysis(String tempFile) {
64 this.tempFile = tempFile;
65 this.saveFile = tempFile;
66 }
67
68 public WordAnalysis(String tempFile, String saveFile) {
69 this.tempFile = tempFile;
70 this.saveFile = saveFile;
71 // 复制模版文件到输出文件
72 try {
73 CopyFile();
74 } catch (IOException e) {
75 e.printStackTrace();
76 }
77 }
78
79 // 打开文档
80 // 采用流的方式可以打开保存在统一个文集
81 // opcpackage 必须保存为另外一个文件
82 public XWPFDocument openDocument() throws IOException {
83 XWPFDocument xdoc = null;
84 InputStream is = null;
85 is = new FileInputStream(saveFile);
86 xdoc = new XWPFDocument(is);
87 return xdoc;
88 }
89
90 // 关闭文档
91 public void closeDocument(XWPFDocument document) {
92 try {
93 document.close();
94 } catch (IOException e) {
95 e.printStackTrace();
96 }
97 }
98
99 // 保存文档
100 public void saveDocument(XWPFDocument document) {
101 OutputStream os;
102 try {
103 os = new FileOutputStream(saveFile);
104 if (os != null) {
105 document.write(os);
106 os.close();
107 }
108 closeDocument(document);
109 } catch (FileNotFoundException e) {
110 e.printStackTrace();
111 } catch (IOException e) {
112 e.printStackTrace();
113 }
114 }
115
116 // 复制Run
117 private void CopyRun(XWPFRun target, XWPFRun source) {
118 target.getCTR().setRPr(source.getCTR().getRPr());
119 // 设置文本
120 target.setText(source.text());
121 }
122
123 // 复制段落
124 private void copyParagraph(XWPFParagraph target, XWPFParagraph source) {
125 // 设置段落样式
126 target.getCTP().setPPr(source.getCTP().getPPr());
127 // 添加Run标签
128 for (int pos = 0; pos < target.getRuns().size(); pos++) {
129 target.removeRun(pos);
130 }
131 for (XWPFRun s : source.getRuns()) {
132 XWPFRun targetrun = target.createRun();
133 CopyRun(targetrun, s);
134 }
135 }
136
137 // 复制单元格
138 private void copyTableCell(XWPFTableCell target, XWPFTableCell source) {
139 // 列属性
140 target.getCTTc().setTcPr(source.getCTTc().getTcPr());
141 // 删除目标 targetCell 所有单元格
142 for (int pos = 0; pos < target.getParagraphs().size(); pos++) {
143 target.removeParagraph(pos);
144 }
145 // 添加段落
146 for (XWPFParagraph sp : source.getParagraphs()) {
147 XWPFParagraph targetP = target.addParagraph();
148 copyParagraph(targetP, sp);
149 }
150 }
151
152 // 复制行
153 private void CopytTableRow(XWPFTableRow target, XWPFTableRow source) {
154 // 复制样式
155 target.getCtRow().setTrPr(source.getCtRow().getTrPr());
156 // 复制单元格
157 for (int i = 0; i < target.getTableCells().size(); i++) {
158 copyTableCell(target.getCell(i), source.getCell(i));
159 }
160 }
161
162 // 获取表格中所有段落
163 public List<XWPFParagraph> getTableParagraph(XWPFTable table) {
164 List<XWPFParagraph> paras = new ArrayList<XWPFParagraph>();
165 List<XWPFTableRow> rows = table.getRows();
166 for (XWPFTableRow row : rows) {
167 for (XWPFTableCell cell : row.getTableCells()) {
168 for (XWPFParagraph p : cell.getParagraphs()) {
169 // 去掉空白字符串
170 if (p.getText() != null && p.getText().length() > 0) {
171 paras.add(p);
172 }
173 }
174 }
175 }
176 return paras;
177 }
178
179 // 返回为空 表示是普通表格,否则是个列表
180 private String getTableListName(XWPFTable table, String regex) {
181 if (regex == "")
182 regex = defaultRegex;
183 String tableName = "";
184 XWPFTableRow firstRow = table.getRow(0);
185 XWPFTableCell firstCell = firstRow.getCell(0);
186 String cellText = firstCell.getText();
187 Pattern pattern = Pattern.compile(regex);
188 Matcher matcher = pattern.matcher(cellText);
189 boolean find = matcher.find();
190 while (find) {
191 tableName = matcher.group();
192 // 跳出循环
193 find = false;
194 }
195 firstRow = null;
196 firstCell = null;
197 pattern = null;
198 matcher = null;
199 cellText = null;
200 return tableName;
201
202 }
203
204 // 获取文档中所有的表格,不包含嵌套表格
205 // listTable false 返回普通表格, true 返回列表表格
206 public List<XWPFTable> getDocTables(XWPFDocument doc, boolean listTable,
207 String regex) {
208 List<XWPFTable> lstTables = new ArrayList<XWPFTable>();
209 for (XWPFTable table : doc.getTables()) {
210 String tbName = getTableListName(table, regex);
211 if (listTable && tbName != "") {
212 lstTables.add(table);
213 }
214 if (!listTable && (tbName == null || tbName.length() <= 0)) {
215 lstTables.add(table);
216 }
217 }
218 return lstTables;
219 }
220
221 // 向 taglist中添加新解析的段落信息
222 private void setTagInfoList(List<TagInfo> list, XWPFParagraph p,
223 String regex) {
224 if (regex == "")
225 regex = defaultRegex;
226 Pattern pattern = Pattern.compile(regex);
227 Matcher matcher = pattern.matcher(p.getText());
228 int startPosition = 0;
229 while (matcher.find(startPosition)) {
230 String match = matcher.group();
231 if (!list.contains(new TagInfo(match, ""))) {
232 list.add(new TagInfo(match, ""));
233 }
234 startPosition = matcher.end();
235 }
236 }
237
238 // 获取段落集合中所有文本
239 public List<TagInfo> getWordTag(XWPFDocument doc, String regex) {
240 List<TagInfo> tags = new ArrayList<TagInfo>();
241 // 普通段落
242 List<XWPFParagraph> pars = doc.getParagraphs();
243 for (int i = 0; i < pars.size(); i++) {
244 XWPFParagraph p = pars.get(i);
245 setTagInfoList(tags, p, regex);
246 }
247 // Table中段落
248 List<XWPFTable> commTables = getDocTables(doc, false, regex);
249 for (XWPFTable table : commTables) {
250 List<XWPFParagraph> tparags = getTableParagraph(table);
251 for (int i = 0; i < tparags.size(); i++) {
252 XWPFParagraph p = tparags.get(i);
253 setTagInfoList(tags, p, regex);
254 }
255 }
256 return tags;
257 }
258
259 // 获取Table列表中的配置信息
260 public Map<String, List<List<TagInfo>>> getTableTag(XWPFDocument doc,
261 String regex) {
262 Map<String, List<List<TagInfo>>> mapList = new HashMap<String, List<List<TagInfo>>>();
263 List<XWPFTable> lstTables = getDocTables(doc, true, regex);
264 for (XWPFTable table : lstTables) {
265 // 获取每个表格第一个单元格,以及最后一行
266 String strTableName = getTableListName(table, regex);
267 List<List<TagInfo>> list = new ArrayList<List<TagInfo>>();
268 List<TagInfo> lstTag = new ArrayList<TagInfo>();
269 int rowSize = table.getRows().size();
270 XWPFTableRow lastRow = table.getRow(rowSize - 1);
271 for (XWPFTableCell cell : lastRow.getTableCells()) {
272 for (XWPFParagraph p : cell.getParagraphs()) {
273 // 去掉空白字符串
274 if (p.getText() != null && p.getText().length() > 0) {
275 setTagInfoList(lstTag, p, regex);
276 }
277 }
278 }
279 list.add(lstTag);
280 // 添加到数据集
281 mapList.put(strTableName, list);
282 }
283 return mapList;
284 }
285
286 // 替换文本 已处理跨行的情况
287 // 注意 文档中 不能出现类似$${\w+}的字符,由于searchText会一个字符一个字符做比价,找到第一个比配的开始计数
288 public void ReplaceInParagraph(List<TagInfo> tagList, XWPFParagraph para,
289 String regex) {
290 if (regex == "")
291 regex = defaultRegex;
292 List<XWPFRun> runs = para.getRuns();
293 for (TagInfo ti : tagList) {
294 String find = ti.TagText;
295 String replValue = ti.TagValue;
296 TextSegement found = para.searchText(find,
297 new PositionInParagraph());
298 if (found != null) {
299 // 判断查找内容是否在同一个Run标签中
300 if (found.getBeginRun() == found.getEndRun()) {
301 XWPFRun run = runs.get(found.getBeginRun());
302 String runText = run.getText(run.getTextPosition());
303 String replaced = runText.replace(find, replValue);
304 run.setText(replaced, 0);
305 } else {
306 // 存在多个Run标签
307 StringBuilder sb = new StringBuilder();
308 for (int runPos = found.getBeginRun(); runPos <= found
309 .getEndRun(); runPos++) {
310 XWPFRun run = runs.get(runPos);
311 sb.append(run.getText((run.getTextPosition())));
312 }
313 String connectedRuns = sb.toString();
314 String replaced = connectedRuns.replace(find, replValue);
315 XWPFRun firstRun = runs.get(found.getBeginRun());
316 firstRun.setText(replaced, 0);
317 // 删除后边的run标签
318 for (int runPos = found.getBeginRun() + 1; runPos <= found
319 .getEndRun(); runPos++) {
320 // 清空其他标签内容
321 XWPFRun partNext = runs.get(runPos);
322 partNext.setText("", 0);
323 }
324 }
325 }
326 }
327 // 完成第一遍查找,检测段落中的标签是否已经替换完 TODO 2016-06-14忘记当时处于什么考虑 加入这段代码
328 // Pattern pattern = Pattern.compile(regex);
329 // Matcher matcher = pattern.matcher(para.getText());
330 // boolean find = matcher.find();
331 // if (find) {
332 // ReplaceInParagraph(tagList, para, regex);
333 // find = false;
334 // }
335 }
336
337 // 替换列表数据
338 public void ReplaceInTable(List<List<TagInfo>> tagList, XWPFTable table,
339 String regex) {
340 int tempRowIndex = table.getRows().size() - 1;
341 XWPFTableRow tempRow = table.getRow(tempRowIndex);
342 for (List<TagInfo> lst : tagList) {
343 table.createRow();
344 XWPFTableRow newRow = table.getRow(table.getRows().size() - 1);
345 CopytTableRow(newRow, tempRow);
346 List<XWPFTableCell> nCells = newRow.getTableCells();
347 for (int i = 0; i < nCells.size(); i++) {
348 XWPFTableCell cell = newRow.getCell(i);
349 for (XWPFParagraph p : cell.getParagraphs()) {
350 if (p.getText() != null && p.getText().length() > 0) {
351 ReplaceInParagraph(lst, p, regex);
352 }
353 }
354 }
355 }
356 // 删除模版行
357 table.removeRow(tempRowIndex);
358 }
359
360 // 替换所有tag
361 public void ReplaceAllTag(XWPFDocument doc, List<TagInfo> formTagList,
362 Map<String, List<List<TagInfo>>> tableTagList, String regex) {
363 // 替换普通段落
364 for (XWPFParagraph p : doc.getParagraphs()) {
365 ReplaceInParagraph(formTagList, p, regex);
366 }
367 // 替换普通表格中段落
368 List<XWPFTable> listCommTable = getDocTables(doc, false, regex);
369 for (XWPFTable t : listCommTable) {
370 List<XWPFParagraph> lstable = getTableParagraph(t);
371 for (XWPFParagraph pt : lstable) {
372 ReplaceInParagraph(formTagList, pt, regex);
373 }
374 }
375 List<XWPFTable> listTable = getDocTables(doc, true, regex);
376 for (XWPFTable table : listTable) {
377 String tableName = getTableListName(table, regex);
378 List<TagInfo> tableNameTags = new ArrayList<TagInfo>();
379 tableNameTags.add(new TagInfo(tableName, ""));
380 XWPFTableCell firstCell = table.getRow(0).getCell(0);
381 List<XWPFParagraph> cellParas = firstCell.getParagraphs();
382 for (XWPFParagraph pt : cellParas) {
383 ReplaceInParagraph(tableNameTags, pt, regex);
384 }
385 List<List<TagInfo>> targetTableList = tableTagList.get(tableName);
386 ReplaceInTable(targetTableList, table, regex);
387 }
388 }
389 }