将office文件转化为html格式或者pdf格式

在转换之前,需要启动openOffice的服务:在openOffice目录下的命令窗口中执行soffice -headless -accept=”socket,host=127.0.0.1,port=8100;urp;” -nofirststartwizard即可启动
不知道如何启动的参照我的​​​另外一篇文章​

我用的maven工程,先要引入依赖,加到pom.xml中

<!-- https://mvnrepository.com/artifact/com.artofsolving/jodconverter -->
<dependency>
<groupId>com.artofsolving</groupId>
<artifactId>jodconverter</artifactId>
<version>2.2.1</version>
</dependency>

在D盘的这个目录下面加入这些个office文件和openOffice文件夹

基于openOffice和java实现office转pdf和html示例代码_开发语言

话不多说,直接上代码。

package ssm.util;


import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.ConnectException;
import java.text.SimpleDateFormat;
import java.util.Date;

import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
/**
* 利用jodconverter(基于OpenOffice服务)将文件(*.doc、*.docx、*.xls、*.ppt)转化为html格式或者pdf格式,
* 使用前请检查OpenOffice服务是否已经开启, OpenOffice进程名称:soffice.exe | soffice.bin
*
* @author yjclsx
*/
public class Doc2HtmlUtil {

private static Doc2HtmlUtil doc2HtmlUtil;

/**
* 获取Doc2HtmlUtil实例
*/
public static synchronized Doc2HtmlUtil getDoc2HtmlUtilInstance() {
if (doc2HtmlUtil == null) {
doc2HtmlUtil = new Doc2HtmlUtil();
}
return doc2HtmlUtil;
}

/**
* 转换文件成html
*
* @param fromFileInputStream:
* @throws IOException
*/
public String file2Html(InputStream fromFileInputStream, String toFilePath,String type) throws IOException {
Date date = new Date();
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
String timesuffix = sdf.format(date);
String docFileName = null;
String htmFileName = null;
if("doc".equals(type)){
docFileName = "doc_" + timesuffix + ".doc";
htmFileName = "doc_" + timesuffix + ".html";
}else if("docx".equals(type)){
docFileName = "docx_" + timesuffix + ".docx";
htmFileName = "docx_" + timesuffix + ".html";
}else if("xls".equals(type)){
docFileName = "xls_" + timesuffix + ".xls";
htmFileName = "xls_" + timesuffix + ".html";
}else if("ppt".equals(type)){
docFileName = "ppt_" + timesuffix + ".ppt";
htmFileName = "ppt_" + timesuffix + ".html";
}else{
return null;
}

File htmlOutputFile = new File(toFilePath + File.separatorChar + htmFileName);
File docInputFile = new File(toFilePath + File.separatorChar + docFileName);
if (htmlOutputFile.exists())
htmlOutputFile.delete();
htmlOutputFile.createNewFile();
if (docInputFile.exists())
docInputFile.delete();
docInputFile.createNewFile();
/**
* 由fromFileInputStream构建输入文件
*/
try {
OutputStream os = new FileOutputStream(docInputFile);
int bytesRead = 0;
byte[] buffer = new byte[1024 * 8];
while ((bytesRead = fromFileInputStream.read(buffer)) != -1) {
os.write(buffer, 0, bytesRead);
}

os.close();
fromFileInputStream.close();
} catch (IOException e) {
}

OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
try {
connection.connect();
} catch (ConnectException e) {
System.err.println("文件转换出错,请检查OpenOffice服务是否启动。");
}
// convert
DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
converter.convert(docInputFile, htmlOutputFile);
connection.disconnect();
// 转换完之后删除word文件
docInputFile.delete();
return htmFileName;
}

/**
* 转换文件成pdf
*
* @param fromFileInputStream:
* @throws IOException
*/
public String file2pdf(InputStream fromFileInputStream, String toFilePath,String type) throws IOException {
Date date = new Date();
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
String timesuffix = sdf.format(date);
String docFileName = null;
String htmFileName = null;
if("doc".equals(type)){
docFileName = "doc_" + timesuffix + ".doc";
htmFileName = "doc_" + timesuffix + ".pdf";
}else if("docx".equals(type)){
docFileName = "docx_" + timesuffix + ".docx";
htmFileName = "docx_" + timesuffix + ".pdf";
}else if("xls".equals(type)){
docFileName = "xls_" + timesuffix + ".xls";
htmFileName = "xls_" + timesuffix + ".pdf";
}else if("ppt".equals(type)){
docFileName = "ppt_" + timesuffix + ".ppt";
htmFileName = "ppt_" + timesuffix + ".pdf";
}else{
return null;
}

File htmlOutputFile = new File(toFilePath + File.separatorChar + htmFileName);
System.out.println(toFilePath + File.separatorChar + htmFileName);
File docInputFile = new File(toFilePath + File.separatorChar + docFileName);
if (htmlOutputFile.exists())
htmlOutputFile.delete();
htmlOutputFile.createNewFile();
if (docInputFile.exists())
docInputFile.delete();
docInputFile.createNewFile();
/**
* 由fromFileInputStream构建输入文件
*/
try {
OutputStream os = new FileOutputStream(docInputFile);
int bytesRead = 0;
byte[] buffer = new byte[1024 * 8];
while ((bytesRead = fromFileInputStream.read(buffer)) != -1) {
os.write(buffer, 0, bytesRead);
}

os.close();
fromFileInputStream.close();
} catch (IOException e) {
}

OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
try {
connection.connect();
} catch (ConnectException e) {
System.err.println("文件转换出错,请检查OpenOffice服务是否启动。");
}
// convert
DocumentConverter converter = new OpenOfficeDocumentConverter(connection);
converter.convert(docInputFile, htmlOutputFile);
connection.disconnect();
// 转换完之后删除word文件
docInputFile.delete();
return htmFileName;
}

public static void main(String[] args) throws IOException {
Doc2HtmlUtil coc2HtmlUtil = getDoc2HtmlUtilInstance();
File file = null;
FileInputStream fileInputStream = null;

file = new File("D:/poi-test/XLS.xls");
fileInputStream = new FileInputStream(file);
coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/xls","xls");
// coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/xls","xls");

file = new File("D:/poi-test/test.doc");
fileInputStream = new FileInputStream(file);
coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/doc","doc");
// coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/doc","doc");

file = new File("D:/poi-test/PPT.ppt");
fileInputStream = new FileInputStream(file);
coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/ppt","ppt");
// coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/ppt","ppt");

file = new File("D:/poi-test/DOCX.docx");
fileInputStream = new FileInputStream(file);
// coc2HtmlUtil.file2Html(fileInputStream, "D:/poi-test/openOffice/docx","docx");
// coc2HtmlUtil.file2pdf(fileInputStream, "D:/poi-test/openOffice/docx","docx");

}

}

执行代码,命令行没有报错

基于openOffice和java实现office转pdf和html示例代码_html_02


查看转换出来的文件doc转的html

基于openOffice和java实现office转pdf和html示例代码_开发语言_03


ppt转的html

基于openOffice和java实现office转pdf和html示例代码_java_04


xls转的html

基于openOffice和java实现office转pdf和html示例代码_开发语言_05


转换pdf只要把上面的注释代码打开即可,测试过是可以转换成pdf的,

docx的暂时不能转换,我查了一下说是jodconverter的2.2版本的好使。

大家还有什么问题可以私信于我。