Java读取word文档中的内容
一、效果:
1.1 doc文本
1.2 程序获取的内容
二、源码
2.1 pom文件
<!-- poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.16</version>
</dependency>
2.2 Java代码
public class TestReadDoc {
/**
* 读取word文档中后缀为doc的文件
* @param filePath
* @return
*/
public static String readWordDoc(String filePath){
String content = null;
InputStream input = null;
try {
input = new FileInputStream(new File(filePath));
WordExtractor wex = new WordExtractor(input);
content = wex.getText();
} catch (Exception e) {
e.printStackTrace();
}
return content;
}
/**
* 读取word文档中后缀为docx的文件
* @param filePath
* @return
*/
public static String readWordDocX(String filePath){
String content = null;
OPCPackage opcPackage = null;
try {
opcPackage = POIXMLDocument.openPackage(filePath);
XWPFDocument xwpf = new XWPFDocument(opcPackage);
POIXMLTextExtractor poiText = new XWPFWordExtractor(xwpf);
content = poiText.getText();
} catch (Exception e) {
e.printStackTrace();
}
return content;
}
public static void main(String[] args) {
System.out.println("****** 下面就是获取的内容 ******");
String doc = "C:\\Users\\canwei\\Desktop\\test.doc";
System.out.println(readWordDoc(doc));
}
}