通过java实现word转PDF
介绍
用于java项目中解决word转pdf的需求,转换的效果跟调用的工具类、字体库、源文件(是wps还是microsoft保存的,格式版本等)、系统环境等多个因素相关,没有百分百完成的方法,只有不断尝试,选择适合的方法。以下介绍三个能实现该功能的方法。
一、通过Aspose实现
说明:毕竟是需要付费买Licnse才能去水印的,还是比较好用的,我这边使用的版本是20.12。感兴趣的同学可以去aspose-words-20.12-jdk17.jar中查看License.class的源码了解验证逻辑。网上也有文章介绍这个逻辑,但大多数都是21年之前的说明了,22年后相同版本的License验证有变动。老版本(18.6版本,当前写文章的时间2022-6-26)的Licene的认证逻辑跟网上的差别不大,但是方法名称是有变动的,不要一味照搬其他文章内容,还是需要多看多思考。
项目实践过程中遇到问题:低于19.11的版本(本人测试过18.6版本的)在保存pdf后,会出现行高自动增加的问题。
用于实现的jar包20.12和18.6的地址:
相关依赖
<repositories>
<repository>
<id>AsposeJavaAPI</id>
<name>Aspose Java API</name>
<url>https://repository.aspose.com/repo/</url>
</repository>
</repositories>
<span ><span ><</span>dependencies<span >></span></span>
<span ><span ><</span>dependency<span >></span></span>
<span ><span ><</span>groupId<span >></span></span>com<span >.</span>aspose<span ><</span><span >/</span>groupId<span >></span>
<span ><span ><</span>artifactId<span >></span></span>aspose<span >-</span>words<span ><</span><span >/</span>artifactId<span >></span>
<span ><span ><</span>version<span >></span></span><span >20.12</span><span ><</span><span >/</span>version<span >></span>
<span ><span ><</span>classifier<span >></span></span>jdk17<span ><</span><span >/</span>classifier<span >></span>
<span ><</span><span >/</span>dependency<span >></span>
<span ><</span><span >/</span>dependencies<span >></span>
<span ><span ><</span>dependencies<span >></span></span>
<span ><span ><</span>dependency<span >></span></span>
<span ><span ><</span>groupId<span >></span></span>com<span >.</span>aspose<span ><</span><span >/</span>groupId<span >></span>
<span ><span ><</span>artifactId<span >></span></span>aspose<span >-</span>words<span ><</span><span >/</span>artifactId<span >></span>
<span ><span ><</span>version<span >></span></span><span >20.12</span><span ><</span><span >/</span>version<span >></span>
<span ><span ><</span>classifier<span >></span></span>jdk17<span ><</span><span >/</span>classifier<span >></span>
<span ><</span><span >/</span>dependency<span >></span>
<span ><</span><span >/</span>dependencies<span >></span>
核心代码
/**
* 通过aspose 将word转pdf
*
* @param sourcePath 源文件地址 如 /root/example.doc
* @param targetPath 目标文件地址 如 /root/example.pdf
*/
public static void asposeWordToPdf(String sourcePath, String targetPath) {
LoadOptions opts = new LoadOptions();
// opts.setMswVersion(MsWordVersion.WORD_2016);
opts.getLanguagePreferences().setDefaultEditingLanguage(EditingLanguage.CHINESE_PRC);
Document doc = null;
try {
doc = new Document(sourcePath, opts);
ParagraphFormat pf = doc.getStyles().getDefaultParagraphFormat();
pf.clearFormatting();
PdfSaveOptions options = new PdfSaveOptions();
// 文字和图像压缩
options.setExportDocumentStructure(true);
options.setTextCompression(PdfTextCompression.FLATE);
options.setImageCompression(PdfImageCompression.AUTO);
// 接收修订
doc.acceptAllRevisions();
// 去掉批注
NodeCollection nc = doc.getChildNodes(NodeType.COMMENT,true);
if (nc != null && nc.getCount() > 0) {
for(int i=0;i<nc.getCount();i++){
log.info("清除批注:{}",nc.get(i).getText());
Node comment =nc.get(i);
comment.getParentNode().removeChild(comment);
}
}
// 将Word另存为PDF
doc.save(targetPath, options);
} catch (Exception e) {
log.error("[aspose] word转pdf失败:{}", e.toString());
}
}
二、通过docx4j实现
通过docx4j实现
相关依赖
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-JAXB-Internal</artifactId>
<version>8.2.4</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-export-fo</artifactId>
<version>8.2.4</version>
</dependency>
核心代码
/**
* 通过docx4j 实现word转pdf
*
* @param sourcePath 源文件地址 如 /root/example.doc
* @param targetPath 目标文件地址 如 /root/example.pdf
*/
public static void docx4jWordToPdf(String sourcePath, String targetPath) {
try {
WordprocessingMLPackage pkg = Docx4J.load(new File(sourcePath));
Mapper fontMapper = new IdentityPlusMapper();
fontMapper.put("隶书", PhysicalFonts.get("LiSu"));
fontMapper.put("宋体", PhysicalFonts.get("SimSun"));
fontMapper.put("微软雅黑", PhysicalFonts.get("Microsoft Yahei"));
fontMapper.put("黑体", PhysicalFonts.get("SimHei"));
fontMapper.put("楷体", PhysicalFonts.get("KaiTi"));
fontMapper.put("新宋体", PhysicalFonts.get("NSimSun"));
fontMapper.put("华文行楷", PhysicalFonts.get("STXingkai"));
fontMapper.put("华文仿宋", PhysicalFonts.get("STFangsong"));
fontMapper.put("仿宋", PhysicalFonts.get("FangSong"));
fontMapper.put("幼圆", PhysicalFonts.get("YouYuan"));
fontMapper.put("华文宋体", PhysicalFonts.get("STSong"));
fontMapper.put("华文中宋", PhysicalFonts.get("STZhongsong"));
fontMapper.put("等线", PhysicalFonts.get("SimSun"));
fontMapper.put("等线 Light", PhysicalFonts.get("SimSun"));
fontMapper.put("华文琥珀", PhysicalFonts.get("STHupo"));
fontMapper.put("华文隶书", PhysicalFonts.get("STLiti"));
fontMapper.put("华文新魏", PhysicalFonts.get("STXinwei"));
fontMapper.put("华文彩云", PhysicalFonts.get("STCaiyun"));
fontMapper.put("方正姚体", PhysicalFonts.get("FZYaoti"));
fontMapper.put("方正舒体", PhysicalFonts.get("FZShuTi"));
fontMapper.put("华文细黑", PhysicalFonts.get("STXihei"));
fontMapper.put("宋体扩展", PhysicalFonts.get("simsun-extB"));
fontMapper.put("仿宋_GB2312", PhysicalFonts.get("FangSong_GB2312"));
fontMapper.put("新細明體", PhysicalFonts.get("SimSun"));
pkg.setFontMapper(fontMapper);
Docx4J.toPDF(pkg, new FileOutputStream(targetPath));
} catch (Exception e) {
log.error("[docx4j] word转pdf失败:{}", e.toString());
}
}
三、通过documents4j实现
通过documents4j实现
相关依赖
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-local</artifactId>
<version>1.0.3</version>
</dependency>
<dependency>
<groupId>com.documents4j</groupId>
<artifactId>documents4j-transformer-msoffice-word</artifactId>
<version>1.0.3</version>
</dependency>
核心代码
/**
* 通过documents4j 实现word转pdf
*
* @param sourcePath 源文件地址 如 /root/example.doc
* @param targetPath 目标文件地址 如 /root/example.pdf
*/
public static void documents4jWordToPdf(String sourcePath, String targetPath) {
File inputWord = new File(sourcePath);
File outputFile = new File(targetPath);
try {
InputStream docxInputStream = new FileInputStream(inputWord);
OutputStream outputStream = new FileOutputStream(outputFile);
IConverter converter = LocalConverter.builder().build();
converter.convert(docxInputStream)
.as(DocumentType.DOCX)
.to(outputStream)
.as(DocumentType.PDF).execute();
outputStream.close();
} catch (Exception e) {
log.error("[documents4J] word转pdf失败:{}", e.toString());
}
}