工具类,直接拷贝就可以使用
import com.itextpdf.awt.geom.Rectangle2D;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.ImageRenderInfo;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.RenderListener;
import com.itextpdf.text.pdf.parser.TextRenderInfo;
import lombok.extern.slf4j.Slf4j;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @Desc 工具类,查找关键字的坐标
* @Author zr
* @Date 2023/4/25 21:00
* @Version 1.0
*/
@Slf4j
public class PdfFindKeyCoordinate1 {
/**
* filepath:文件路径加文件名
* KEY_WORD:关键字 要求:(报表中关键字不能加粗,宋体,字号9)
*/
private static int i = 1;
static List<float[]> arrays = new ArrayList();
static String sb ;
/**
* @Desc pdf中查找关键字的坐标
* @Author zr
* @Date 2023/4/27 10:38
* @param filePath 文件路径
* @param KEY_WORD 关键字
* @return java.util.List 返回二位数组,查询到的所有关键字的坐标
*/
public static List getKeyWordsCoordinate(String filePath, final String KEY_WORD) throws IOException {
// 原PDF文件
PdfReader pdfReader = new PdfReader(filePath);
return getKeyWordsCoordinate(pdfReader,KEY_WORD);
}
/**
* @Desc pdf中查找关键字的坐标
* @Author zr
* @Date 2023/4/27 10:38
* @param inputStream pdf输入流
* @param KEY_WORD 关键字
* @return java.util.List 返回二位数组,查询到的所有关键字的坐标
*/
public static List getKeyWordsCoordinate(ByteArrayInputStream inputStream, final String KEY_WORD) throws IOException {
// 原PDF文件
PdfReader reader = new PdfReader(inputStream);
return getKeyWordsCoordinate(reader,KEY_WORD);
}
/**
* @Desc pdf中查找关键字的坐标
* @Author zr
* @Date 2023/4/27 10:38
* @param pdfReader pdf文件读入器
* @param KEY_WORD 关键字
* @return java.util.List 返回二位数组,查询到的所有关键字的坐标
*/
public static List getKeyWordsCoordinate(PdfReader pdfReader, final String KEY_WORD) throws IOException {
arrays.clear();
sb="";
//获取PDF的页数
int pageNum = pdfReader.getNumberOfPages();
PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(
pdfReader);
for (i = 1; i <= pageNum; i++) {
pdfReaderContentParser.processContent(i, new RenderListener() {
@Override
public void renderText(TextRenderInfo textRenderInfo) {
String text = textRenderInfo.getText(); // 整页内容
if (null != text && text.contains(KEY_WORD)) {
Rectangle2D.Float boundingRectange = textRenderInfo
.getBaseline().getBoundingRectange();
sb = boundingRectange.x+"--"+boundingRectange.y+"---";
float[] resu = new float[3];
resu[0] = boundingRectange.x;
resu[1] = boundingRectange.y;
resu[2] = i;
arrays.add(resu);
}
}
@Override
public void renderImage(ImageRenderInfo arg0) {
// TODO Auto-generated method stub
}
@Override
public void endTextBlock() {
// TODO Auto-generated method stub
}
@Override
public void beginTextBlock() {
// TODO Auto-generated method stub
}
});
}
return arrays;
}
}
扩展:需要用到的maven jar包
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.10</version>
</dependency>
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>5.5.10</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-asian</artifactId>
<version>5.2.0</version>
</dependency>
<dependency>
<groupId>org.xhtmlrenderer</groupId>
<artifactId>flying-saucer-pdf-itext5</artifactId>
<version>9.0.3</version>
<exclusions>
<exclusion>
<artifactId>itextpdf</artifactId>
<groupId>com.itextpdf</groupId>
</exclusion>
</exclusions>
</dependency>