第一:下载POI,在http://jakarta.apache.org/poi/中,下载poi-bin-3.5-beta4-20081128.zip,解压后把jar包引入项目工程。

第二:处理Word(Word.java)

import org.apache.poi.hwpf.extractor.WordExtractor;
import java.io.File;
import java.io.InputStream;
 
public class Word {
    public static void main(String[] args) throws Exception {
       System.out.println(getContent("c:\\11.doc"));
    }
 
    public static String getContent(String s) throws Exception {
       return getContent(new java.io.FileInputStream(s));
    }
 
    public static String getContent(File f) throws Exception {
       return getContent(new java.io.FileInputStream(f));
    }
 
    public static String getContent(InputStream is) throws Exception {
       String bodyText = null;
       WordExtractor ex = new WordExtractor(is);
       bodyText = ex.getText();
       return bodyText;
    }
}

 
 

第三:处理Excel(Excel.java)

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFCell;
import java.io.File;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
 
public class Excel {
    public static void main(String[] args) throws Exception {
       System.out.println(getContent("c:\\22.xls"));
    }
 
    public static String getContent(String s) throws Exception {
       return getContent(new java.io.FileInputStream(s));
    }
 
    public static String getContent(File f) throws Exception {
       return getContent(new java.io.FileInputStream(f));
    }
 
    public static String getContent(InputStream is) throws Exception {
       StringBuffer content = new StringBuffer();
       HSSFWorkbook workbook = new HSSFWorkbook(is);
       for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) {
           HSSFSheet aSheet = workbook.getSheetAt(numSheets);// 获得一个sheet
           content.append("\n");
           if (null == aSheet) {
              continue;
           }
           for (int rowNum = 0; rowNum <= aSheet.getLastRowNum(); rowNum++) {
              content.append("\n");
              HSSFRow aRow = aSheet.getRow(rowNum);
              if (null == aRow) {
                  continue;
              }
              for (short cellNum = 0; cellNum <= aRow.getLastCellNum(); cellNum++) {
 
                  HSSFCell aCell = aRow.getCell(cellNum);
                  if (null == aCell) {
                     continue;
                  }
                  if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
                     content.append(aCell.getRichStringCellValue()
                            .getString());
                  } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
                     boolean b = HSSFDateUtil.isCellDateFormatted(aCell);
                     if (b) {
                         Date date = aCell.getDateCellValue();
                         SimpleDateFormat df = new SimpleDateFormat(
                                "yyyy-MM-dd");
                         content.append(df.format(date));
                     }
                  }
              }
           }
       }
       return content.toString();
    }
}

第四:处理PowerPoint(PowerPoint.java)

import java.io.File;
import java.io.InputStream;
import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.hslf.model.TextRun;
import org.apache.poi.hslf.model.Slide;
import org.apache.poi.hslf.usermodel.SlideShow;
 
public class PowerPoint {
    public static void main(String[] args) throws Exception {
       System.out.println(getContent("c:\\33.ppt"));
    }
 
    public static String getContent(String s) throws Exception {
       return getContent(new java.io.FileInputStream(s));
    }
 
    public static String getContent(File f) throws Exception {
       return getContent(new java.io.FileInputStream(f));
    }
 
    public static String getContent(InputStream is) throws Exception {
       StringBuffer content = new StringBuffer("");
       SlideShow ss = new SlideShow(new HSLFSlideShow(is));
       Slide[] slides = ss.getSlides();
       for (int i = 0; i < slides.length; i++) {
           TextRun[] t = slides[i].getTextRuns();
           for (int j = 0; j < t.length; j++) {
              content.append(t[j].getText());
           }
           content.append(slides[i].getTitle());
       }
       return content.toString();
    }
}