读取word中的内容
/**
* 利用POI实现从word中读取内容
*/============================================
将读取的内容写入另外一个文件中
package com.cy;
/**
* WordReader类中readDoc的作用为从word中将数据读出
*/import com.cy.WordWriter;
import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.IOException;import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;public class WordWriter {
public static boolean writeDoc(String path, String content) {
boolean w = false;
try { // byte b[] = content.getBytes("ISO-8859-1");
byte b[] = content.getBytes(); ByteArrayInputStream bais = new ByteArrayInputStream(b);
POIFSFileSystem fs = new POIFSFileSystem();
DirectoryEntry directory = fs.getRoot(); DocumentEntry de = directory.createDocument("WordDocument", bais);
FileOutputStream ostream = new FileOutputStream(path);
fs.writeFilesystem(ostream);
bais.close();
ostream.close(); } catch (IOException e) {
e.printStackTrace();
}
return w;
}
public static void main(String[] args) throws Exception{
String wr=WordReader.readDoc("D://test.doc");
boolean b = writeDoc("D://result.doc",wr);
}
}
//目前该程序只能实现对简单的文字的操作,无法实现对表格样式的操作,继续改进,请关注!!
import java.io.File;
import java.io.FileInputStream;import org.apache.poi.hwpf.extractor.WordExtractor;
public class WordReader {
public static String readDoc(String doc) throws Exception { // 创建输入流读取DOC文件
FileInputStream in = new FileInputStream(new File(doc));
WordExtractor extractor = null;
String text = null;
// 创建WordExtractor
extractor = new WordExtractor(in);
// 对DOC文件进行提取
text = extractor.getText();
return text;
}
public static void main(String[] args) { try{
String text = WordReader.readDoc("d://test.doc");
System.out.println(text);
}catch(Exception e){
e.printStackTrace();
} }
}
摘自:
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class ExportDocImpl {
public void testWord(){
try{
FileInputStream in = new FileInputStream("D:\\sinye.doc");//载入文档
POIFSFileSystem pfs = new POIFSFileSystem(in);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();//得到文档的读取范围
TableIterator it = new TableIterator(range);
//迭代文档中的表格
while (it.hasNext()) {
Table tb = (Table) it.next();
//迭代行,默认从0开始
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
//迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);//取得单元格
//取得单元格的内容
for(int k=0;k<td.numParagraphs();k++){
Paragraph para =td.getParagraph(k);
String s = para.text();
System.out.println(s);
} //end for
} //end for
} //end for
} //end while
}catch(Exception e){
e.printStackTrace();
}
}//end method
public void testWord1(){
try {
//word 2003: 图片不会被读取
InputStream is = new FileInputStream(new File("D:\\sinye.doc"));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getText();
System.out.println(text2003);
//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
OPCPackage opcPackage = POIXMLDocument.openPackage("D:\\sinye.doc");
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text2007 = extractor.getText();
System.out.println(text2007);
} catch (Exception e) {
e.printStackTrace();
}
}
}
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class ExportDocImpl
{
public void testWord(){
try{
FileInputStream in = new FileInputStream("D:\\sinye.doc");//载入文档
POIFSFileSystem pfs = new POIFSFileSystem(in);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();//得到文档的读取范围
TableIterator it = new TableIterator(range);
//迭代文档中的表格
while (it.hasNext()) {
Table tb = (Table) it.next();
//迭代行,默认从0开始
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
//迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);//取得单元格
//取得单元格的内容
for(int k=0;k<td.numParagraphs();k++){
Paragraph para =td.getParagraph(k);
String s = para.text();
System.out.println(s);
} //end for
} //end for
} //end for
} //end while
}catch(Exception e){
e.printStackTrace();
}
}//end method
public void testWord1(){
try {
//word 2003: 图片不会被读取
InputStream is = new FileInputStream(new File("D:\\sinye.doc"));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getText();
System.out.println(text2003);
//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
OPCPackage opcPackage = POIXMLDocument.openPackage("D:\\sinye.doc");
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text2007 = extractor.getText();
System.out.println(text2007);
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* 利用POI实现从word中读取内容
*/============================================
将读取的内容写入另外一个文件中
package com.cy;
/**
* WordReader类中readDoc的作用为从word中将数据读出
*/import com.cy.WordWriter;
import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.IOException;import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;public class WordWriter {
public static boolean writeDoc(String path, String content) {
boolean w = false;
try { // byte b[] = content.getBytes("ISO-8859-1");
byte b[] = content.getBytes(); ByteArrayInputStream bais = new ByteArrayInputStream(b);
POIFSFileSystem fs = new POIFSFileSystem();
DirectoryEntry directory = fs.getRoot(); DocumentEntry de = directory.createDocument("WordDocument", bais);
FileOutputStream ostream = new FileOutputStream(path);
fs.writeFilesystem(ostream);
bais.close();
ostream.close(); } catch (IOException e) {
e.printStackTrace();
}
return w;
}
public static void main(String[] args) throws Exception{
String wr=WordReader.readDoc("D://test.doc");
boolean b = writeDoc("D://result.doc",wr);
}
}
//目前该程序只能实现对简单的文字的操作,无法实现对表格样式的操作,继续改进,请关注!!
import java.io.File;
import java.io.FileInputStream;import org.apache.poi.hwpf.extractor.WordExtractor;
public class WordReader {
public static String readDoc(String doc) throws Exception { // 创建输入流读取DOC文件
FileInputStream in = new FileInputStream(new File(doc));
WordExtractor extractor = null;
String text = null;
// 创建WordExtractor
extractor = new WordExtractor(in);
// 对DOC文件进行提取
text = extractor.getText();
return text;
}
public static void main(String[] args) { try{
String text = WordReader.readDoc("d://test.doc");
System.out.println(text);
}catch(Exception e){
e.printStackTrace();
} }
}
摘自:
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class ExportDocImpl {
public void testWord(){
try{
FileInputStream in = new FileInputStream("D:\\sinye.doc");//载入文档
POIFSFileSystem pfs = new POIFSFileSystem(in);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();//得到文档的读取范围
TableIterator it = new TableIterator(range);
//迭代文档中的表格
while (it.hasNext()) {
Table tb = (Table) it.next();
//迭代行,默认从0开始
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
//迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);//取得单元格
//取得单元格的内容
for(int k=0;k<td.numParagraphs();k++){
Paragraph para =td.getParagraph(k);
String s = para.text();
System.out.println(s);
} //end for
} //end for
} //end for
} //end while
}catch(Exception e){
e.printStackTrace();
}
}//end method
public void testWord1(){
try {
//word 2003: 图片不会被读取
InputStream is = new FileInputStream(new File("D:\\sinye.doc"));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getText();
System.out.println(text2003);
//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
OPCPackage opcPackage = POIXMLDocument.openPackage("D:\\sinye.doc");
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text2007 = extractor.getText();
System.out.println(text2007);
} catch (Exception e) {
e.printStackTrace();
}
}
}
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class ExportDocImpl
{
public void testWord(){
try{
FileInputStream in = new FileInputStream("D:\\sinye.doc");//载入文档
POIFSFileSystem pfs = new POIFSFileSystem(in);
HWPFDocument hwpf = new HWPFDocument(pfs);
Range range = hwpf.getRange();//得到文档的读取范围
TableIterator it = new TableIterator(range);
//迭代文档中的表格
while (it.hasNext()) {
Table tb = (Table) it.next();
//迭代行,默认从0开始
for (int i = 0; i < tb.numRows(); i++) {
TableRow tr = tb.getRow(i);
//迭代列,默认从0开始
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);//取得单元格
//取得单元格的内容
for(int k=0;k<td.numParagraphs();k++){
Paragraph para =td.getParagraph(k);
String s = para.text();
System.out.println(s);
} //end for
} //end for
} //end for
} //end while
}catch(Exception e){
e.printStackTrace();
}
}//end method
public void testWord1(){
try {
//word 2003: 图片不会被读取
InputStream is = new FileInputStream(new File("D:\\sinye.doc"));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getText();
System.out.println(text2003);
//word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后
OPCPackage opcPackage = POIXMLDocument.openPackage("D:\\sinye.doc");
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text2007 = extractor.getText();
System.out.println(text2007);
} catch (Exception e) {
e.printStackTrace();
}
}
}