java通过url在线预览Word、excel、ppt、pdf、txt文档中的内容【只获得其中的文字】
在页面上显示各种文档中的内容。在servlet中的逻辑
word:
1 BufferedInputStream bis = null;
2 URL url = null;
3 HttpURLConnection httpUrl = null; // 建立链接
4 url = new URL(urlReal);
5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源
6 httpUrl.connect();// 获取网络输入流
7 bis = new BufferedInputStream(httpUrl.getInputStream());
8 String bodyText = null;
9 WordExtractor ex = new WordExtractor(bis);
10 bodyText = ex.getText();
11 response.getWriter().write(bodyText);excel:
1 BufferedInputStream bis = null;
2 URL url = null;
3 HttpURLConnection httpUrl = null; // 建立链接
4 url = new URL(urlReal);
5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源
6 httpUrl.connect();// 获取网络输入流
7 bis = new BufferedInputStream(httpUrl.getInputStream());
8 content = new StringBuffer();
9 HSSFWorkbook workbook = new HSSFWorkbook(bis);
10 for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) {
11 HSSFSheet aSheet = workbook.getSheetAt(numSheets);// 获得一个sheet
12 content.append("/n");
13 if (null == aSheet) {
14 continue;
15 }
16 for (int rowNum = 0; rowNum <= aSheet.getLastRowNum(); rowNum++) {
17 content.append("/n");
18 HSSFRow aRow = aSheet.getRow(rowNum);
19 if (null == aRow) {
20 continue;
21 }
22 for (short cellNum = 0; cellNum <= aRow.getLastCellNum(); cellNum++) {
23 HSSFCell aCell = aRow.getCell(cellNum);
24 if (null == aCell) {
25 continue;
26 }
27 if (aCell.getCellType() == HSSFCell.CELL_TYPE_STRING) {
28 content.append(aCell.getRichStringCellValue()
29 .getString());
30 } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) {
31 boolean b = HSSFDateUtil.isCellDateFormatted(aCell);
32 if (b) {
33 Date date = aCell.getDateCellValue();
34 SimpleDateFormat df = new SimpleDateFormat(
35 "yyyy-MM-dd");
36 content.append(df.format(date));
37 }
38 }
39 }
40 }
41 }
42 response.getWriter().write(content.toString());ppt:
1 BufferedInputStream bis = null;
2 URL url = null;
3 HttpURLConnection httpUrl = null; // 建立链接
4 url = new URL(urlReal);
5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源
6 httpUrl.connect();// 获取网络输入流
7 bis = new BufferedInputStream(httpUrl.getInputStream());
8 StringBuffer content = new StringBuffer("");
9 SlideShow ss = new SlideShow(new HSLFSlideShow(bis));
10 Slide[] slides = ss.getSlides();
11 for (int i = 0; i < slides.length; i++) {
12 TextRun[] t = slides[i].getTextRuns();
13 for (int j = 0; j < t.length; j++) {
14 content.append(t[j].getText());
15 }
16 content.append(slides[i].getTitle());
17 }
18 response.getWriter().write(content.toString());
pdf:
1 BufferedInputStream bis = null;
2 URL url = null;
3 HttpURLConnection httpUrl = null; // 建立链接
4 url = new URL(urlReal);
5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源
6 httpUrl.connect();// 获取网络输入流
7 bis = new BufferedInputStream(httpUrl.getInputStream());
8 PDDocument pdfdocument = null;
9 PDFParser parser = new PDFParser(bis);
10 parser.parse();
11 pdfdocument = parser.getPDDocument();
12 ByteArrayOutputStream out = new ByteArrayOutputStream();
13 OutputStreamWriter writer = new OutputStreamWriter(out);
14 PDFTextStripper stripper = new PDFTextStripper();
15 stripper.writeText(pdfdocument.getDocument(), writer);
16 writer.close();
17 byte[] contents = out.toByteArray();
18 String ts = new String(contents);
19 response.getWriter().write(ts);txt:
1 BufferedReader bis = null;
2 URL url = null;
3 HttpURLConnection httpUrl = null; // 建立链接
4 url = new URL(urlReal);
5 httpUrl = (HttpURLConnection) url.openConnection();// 连接指定的资源
6 httpUrl.connect();// 获取网络输入流
7 bis = new BufferedReader( new InputStreamReader(httpUrl.getInputStream()));
8 StringBuffer buf=new StringBuffer();
9 String temp;
10 while ((temp = bis.readLine()) != null) {
11 buf.append(temp);
12 response.getWriter().write(temp);
13 if(buf.length()>=1000){
14 break;
15 }
16 }
17 bis.close();
















