第一:下载POI,在http://jakarta.apache.org/poi/中,下载poi-bin-3.5-beta4-20081128.zip,解压后把jar包引入项目工程。
第二:处理Word(Word.java)
import org.apache.poi.hwpf.extractor.WordExtractor;
import java.io.File;
import java.io.InputStream;
public class Word {
public static void main(String[] args) throws Exception {
System.out.println(getContent("c:\\11.doc"));
}
public static String getContent(String s) throws Exception {
return getContent(new java.io.FileInputStream(s));
}
public static String getContent(File f) throws Exception {
return getContent(new java.io.FileInputStream(f));
}
public static String getContent(InputStream is) throws Exception {
String bodyText = null;
WordExtractor ex = new WordExtractor(is);
bodyText = ex.getText();
return bodyText;
}
}
第三:处理Excel(Excel.java)
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFCell;
import java.io.File;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
public class Excel {
public static void main(String[] args) throws Exception {
System.out.println(getContent("c:\\22.xls"));
}
public static String getContent(String s) throws Exception {
return getContent(new java.io.FileInputStream(s));
}
public static String getContent(File f) throws Exception {
return getContent(new java.io.FileInputStream(f));
}
public static String getContent(InputStream is) throws Exception {
StringBuffer content = new StringBuffer();
HSSFWorkbook workbook = new HSSFWorkbook(is);
for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) {
HSSFSheet aSheet = workbook.getSheetAt(numSheets);// 获得一个sheet
content.append("\n");
if (null == aSheet) {
continue;
}
for (int rowNum = 0; rowNum <= aSheet.getLastRowNum(); rowNum++) {
content.append("\n");
HSSFRow aRow = aSheet.getRow(rowNum);
if (null == aRow) {
continue;
}
for (short cellNum = 0; cellNum <= aRow.getLastCellNum(); cellNum++) {
HSSFCell aCell = aRow.getCell(cellNum);
if (null == aCell) {
continue;
}
if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
content.append(aCell.getRichStringCellValue()
.getString());
} else if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
boolean b = HSSFDateUtil.isCellDateFormatted(aCell);
if (b) {
Date date = aCell.getDateCellValue();
SimpleDateFormat df = new SimpleDateFormat(
"yyyy-MM-dd");
content.append(df.format(date));
}
}
}
}
}
return content.toString();
}
}
第四:处理PowerPoint(PowerPoint.java)
import java.io.File;
import java.io.InputStream;
import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.hslf.model.TextRun;
import org.apache.poi.hslf.model.Slide;
import org.apache.poi.hslf.usermodel.SlideShow;
public class PowerPoint {
public static void main(String[] args) throws Exception {
System.out.println(getContent("c:\\33.ppt"));
}
public static String getContent(String s) throws Exception {
return getContent(new java.io.FileInputStream(s));
}
public static String getContent(File f) throws Exception {
return getContent(new java.io.FileInputStream(f));
}
public static String getContent(InputStream is) throws Exception {
StringBuffer content = new StringBuffer("");
SlideShow ss = new SlideShow(new HSLFSlideShow(is));
Slide[] slides = ss.getSlides();
for (int i = 0; i < slides.length; i++) {
TextRun[] t = slides[i].getTextRuns();
for (int j = 0; j < t.length; j++) {
content.append(t[j].getText());
}
content.append(slides[i].getTitle());
}
return content.toString();
}
}