package office; /** * @author JavaAlpha * @date 2011-11-10 * @version V 1.0 */ /* * 需要的jar包: * poi-3.0.2-FINAL-20080204.jar * poi-contrib-3.0.2-FINAL-20080204.jar * poi-scratchpad-3.0.2-FINAL-20080204.jar * poi-3.5-beta6-20090622.jar * geronimo-stax-api_1.0_spec-1.0.jar * ooxml-schemas-1.0.jar * openxml4j-bin-beta.jar * poi-ooxml-3.5-beta6-20090622.jar * xmlbeans-2.3.0.jar * dom4j-1.6.1.jar */ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.xmlbeans.XmlException; public class WordAndExcelExtractor { public static void main(String[] args){ try{ String wordFile = "E:/word07.docx"; String wordText2007 = WordAndExcelExtractor.extractTextFromDOC2007(wordFile); System.out.println("wordText2007======="+wordText2007); InputStream is = new FileInputStream("E:/1.xls"); String excelText = WordAndExcelExtractor.extractTextFromXLS(is); System.out.println("text2003==========" + excelText); String excelFile = "E:/2.xlsx"; String excelText2007 = WordAndExcelExtractor.extractTextFromXLS2007(excelFile); System.out.println("excelText2007==========" + excelText2007); }catch(Exception e ){ e.printStackTrace(); } } /** * @Method: extractTextFromDOCX * @Description: 从word 2003文档中提取纯文本 * @param * @return String * @throws **/ public static String extractTextFromDOC(InputStream is) throws IOException { WordExtractor ex = new WordExtractor(is); //is是WORD文件的InputStream return ex.getText(); } /** * @Method: extractTextFromDOCX * @Description: 从word 2007文档中提取纯文本 * @param * @return String * @throws **/ public static String extractTextFromDOC2007(String fileName) throws IOException, OpenXML4JException, XmlException { OPCPackage opcPackage = POIXMLDocument.openPackage(fileName); POIXMLTextExtractor ex = new XWPFWordExtractor(opcPackage); return ex.getText(); } /** * * @Method: extractTextFromXLS * * @Description: 从excel 2003文档中提取纯文本 * * * @param * @return String * * @throws * */ @SuppressWarnings("deprecation") private static String extractTextFromXLS(InputStream is) throws IOException { StringBuffer content = new StringBuffer(); HSSFWorkbook workbook = new HSSFWorkbook(is); //创建对Excel工作簿文件的引用 for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) { if (null != workbook.getSheetAt(numSheets)) { HSSFSheet aSheet = workbook.getSheetAt(numSheets); //获得一个sheet for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++) { if (null != aSheet.getRow(rowNumOfSheet)) { HSSFRow aRow = aSheet.getRow(rowNumOfSheet); //获得一行 for (short cellNumOfRow = 0; cellNumOfRow <= aRow.getLastCellNum(); cellNumOfRow++) { if (null != aRow.getCell(cellNumOfRow)) { HSSFCell aCell = aRow.getCell(cellNumOfRow); //获得列值 if(aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC){ content.append(aCell.getNumericCellValue()); }else if(aCell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN){ content.append(aCell.getBooleanCellValue()); }else { content.append(aCell.getStringCellValue()); } } } } } } } return content.toString(); } /** * @Method: extractTextFromXLS2007 * @Description: 从excel 2007文档中提取纯文本 * * @param * @return String * @throws **/ private static String extractTextFromXLS2007(String fileName) throws Exception{ StringBuffer content = new StringBuffer(); //构造 XSSFWorkbook 对象,strPath 传入文件路径 XSSFWorkbook xwb = new XSSFWorkbook(fileName); //循环工作表Sheet for(int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++){ XSSFSheet xSheet = xwb.getSheetAt(numSheet); if(xSheet == null){ continue;} //循环行Row for(int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++){ XSSFRow xRow = xSheet.getRow(rowNum); if(xRow == null){ continue; } //循环列Cell for(int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++){ XSSFCell xCell = xRow.getCell(cellNum); if(xCell == null){ continue; } if(xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN){ content.append(xCell.getBooleanCellValue()); }else if(xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC){ content.append(xCell.getNumericCellValue()); }else{ content.append(xCell.getStringCellValue()); } } } } return content.toString(); } }
Java解析word2007、Excel2003和Excel2007
转载本文章为转载内容,我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题,欢迎原作者联系我们进行内容更正或删除文章。
上一篇:js动态设置div的值
下一篇:VM8.0下安装遇到了问题“windows cannot read the<product key> setting from the unattend answer file”解决方法
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
Java Excel导入导出
Java的两种方式操作Excel
数据 java excel 导出数据 -
Java解析word2007、Excel2003和Excel2007
package office;/** * @author JavaAlpha * @date 2011-11-10 * @version V 1.0 *//* * 需要的jar包:
excel java import string openxml -
POI - 读取Excel2003、Excel2007或更高级的兼容性问题
我们使用POI中的HSSFWorkbook来读取Excel数据。public void test(File file) throws IOException { InputStream inp =&n
different Documents calling appears supplied -
EXCEL2007紧急恢复excel2007;恢复excel
-
gridview导出到excel2007
gridview导出到excel2007
excel gridview -
poi创建Excel2007 (五)
poi创建2003与2007还是有点区别的 2003需要一个包 而2007需要好几个包
java poi excel 解析 apache 创建表