java 读取PDF文件内容进行替换
需要使用到的包
监听类(对需要替换的内容关键词进行匹配)
实体类(保存关键字字体格式信息以及其位置)
工具类(对关键字进行替换)
测试类
需要使用到的包
  

<dependency>
             <groupId>com.itextpdf</groupId>
             <artifactId>itextpdf</artifactId>
             <version>5.5.13.3</version>
         </dependency>
         <dependency>
             <groupId>com.itextpdf</groupId>
             <artifactId>itext-asian</artifactId>
             <version>5.2.0</version>
         </dependency>

 监听类(对需要替换的内容关键词进行匹配)
 public class KeyWordPositionListener implements RenderListener {    //存放匹配上的字符信息
     private List<MatchItem> matches = new ArrayList<MatchItem>();
     //存放所有的字符信息
     private List<MatchItem> allItems = new ArrayList<MatchItem>();    private Rectangle curPageSize;
    /**
      * 匹配的关键字
      */
     private String keyword;
     /**
      * 匹配的当前页
      */
     private Integer pageNumber;    @Override
     public void beginTextBlock() {
         //do nothing
     }    @Override
     public void renderText(TextRenderInfo renderInfo) {
         //获取字符
         String content = renderInfo.getText();
         Rectangle2D.Float textRectangle = renderInfo.getDescentLine().getBoundingRectange();        MatchItem item = new MatchItem();
         item.setContent(content);
         item.setPageNum(pageNumber);
         item.setFontHeight(textRectangle.height == 0 ? 12:textRectangle.height);//默认12
         item.setFontWidth(textRectangle.width);
         item.setPageHeight(curPageSize.getHeight());
         item.setPageWidth(curPageSize.getWidth());
         item.setX((float)textRectangle.getX());
         item.setY((float)textRectangle.getY());        //若keyword是单个字符,匹配上的情况
         if(content.equalsIgnoreCase(keyword)) {
             matches.add(item);
         }
         //保存所有的项
         allItems.add(item);
     }    @Override
     public void endTextBlock() {
         //do nothing
     }    @Override
     public void renderImage(ImageRenderInfo renderInfo) {
         //do nothing
     }    /**
      * 设置需要匹配的当前页
      * @param pageNumber
      */
     public void setPageNumber(Integer pageNumber) {
         this.pageNumber = pageNumber;
     }    /**
      * 设置需要匹配的关键字,忽略大小写
      * @param keyword
      */
     public void setKeyword(String keyword) {
         this.keyword = keyword;
     }    /**
      * 返回匹配的结果列表
      * @return
      */
     public List<MatchItem> getMatches() {
         return matches;
     }    public void setCurPageSize(Rectangle rect) {
         this.curPageSize = rect;
     }    public List<MatchItem> getAllItems() {
         return allItems;
     }    public void setAllItems(List<MatchItem> allItems) {
         this.allItems = allItems;
     }}
 实体类(保存关键字字体格式信息以及其位置)
 public class MatchItem {    //页数
     private Integer pageNum;
     //x坐标
     private Float x;
     //y坐标
     private Float y;
     //页宽
     private Float pageWidth;
     //页高
     private Float pageHeight;
     //匹配字符
     private String content;
     //字体宽
     private float fontWidth;
     //字体高
     private float fontHeight = 12;    public Integer getPageNum() {
         return pageNum;
     }    public void setPageNum(Integer pageNum) {
         this.pageNum = pageNum;
     }    public Float getX() {
         return x;
     }    public void setX(Float x) {
         this.x = x;
     }    public Float getY() {
         return y;
     }    public void setY(Float y) {
         this.y = y;
     }    public Float getPageWidth() {
         return pageWidth;
     }    public void setPageWidth(Float pageWidth) {
         this.pageWidth = pageWidth;
     }    public Float getPageHeight() {
         return pageHeight;
     }    public void setPageHeight(Float pageHeight) {
         this.pageHeight = pageHeight;
     }    public String getContent() {
         return content;
     }    public void setContent(String content) {
         this.content = content;
     }    public float getFontWidth() {
         return fontWidth;
     }    public void setFontWidth(float fontWidth) {
         this.fontWidth = fontWidth;
     }    public float getFontHeight() {
         return fontHeight;
     }    public void setFontHeight(float fontHeight) {
         this.fontHeight = fontHeight;
     }    @Override
     public String toString() {
         return "MatchItem{" +
                 "pageNum=" + pageNum +
                 ", x=" + x +
                 ", y=" + y +
                 ", pageWidth=" + pageWidth +
                 ", pageHeight=" + pageHeight +
                 ", content='" + content + '\'' +
                 '}';
     }
 }

 工具类(对关键字进行替换)
 public class PdfUtil {
     /**
      * 根据关键字和pdf文件字节,全文搜索关键字
      * @param bytes pdf字节
      * @param keyword 关键字
      * @return
      * @throws Exception
      */
     private static List<MatchItem> matchAll(byte[] bytes, String keyword) throws Exception {
         List<MatchItem> items = new ArrayList<>();
         PdfReader reader = new PdfReader(bytes);
         //获取pdf页数
         int pageSize = reader.getNumberOfPages();
         //逐页匹配关键字
         for(int page = 1;page <= pageSize;page++){
             items.addAll(matchPage(reader,page,keyword));
         }
         return items;
     }    /**
      * 根据关键字、文档路径、pdf页数寻找特定的文件内容
      * @param reader
      * @param pageNumber 页数
      * @param keyword 关键字
      * @return
      * @throws Exception
      */
     private static List<MatchItem> matchPage(PdfReader reader, Integer pageNumber,String keyword) throws Exception {
         PdfReaderContentParser parse = new PdfReaderContentParser(reader);
         Rectangle rectangle = reader.getPageSize(pageNumber);
         //匹配监听
         KeyWordPositionListener renderListener = new KeyWordPositionListener();
         renderListener.setKeyword(keyword);
         renderListener.setPageNumber(pageNumber);
         renderListener.setCurPageSize(rectangle);
         parse.processContent(pageNumber, renderListener);
         return findKeywordItems(renderListener,keyword);
     }    /**
      * 找到匹配的关键词块
      * @param renderListener
      * @param keyword
      * @return
      */
     private static List<MatchItem> findKeywordItems(KeyWordPositionListener renderListener,String keyword){
         //先判断本页中是否存在关键词
         List<MatchItem> allItems = renderListener.getAllItems();//所有块LIST
         StringBuffer sbtemp = new StringBuffer("");        for(MatchItem item : allItems){//将一页中所有的块内容连接起来组成一个字符串。
             sbtemp.append(item.getContent());
         }        List<MatchItem> matches = renderListener.getMatches();
        //一页组成的字符串没有关键词,直接return
         //第一种情况:关键词与块内容完全匹配的项,直接返回
         if(sbtemp.toString().indexOf(keyword) == -1 || matches.size() > 0){
             return matches;
         }
         //第二种情况:多个块内容拼成一个关键词,则一个一个来匹配,组装成一个关键词
         sbtemp = new StringBuffer("");
         List<MatchItem> tempItems = new ArrayList();
         for(MatchItem item : allItems){
             if(keyword.indexOf(item.getContent()) != -1 ){
                 tempItems.add(item);
                 sbtemp.append(item.getContent());                if(keyword.indexOf(sbtemp.toString()) == -1){//如果暂存的字符串和关键词 不再匹配时
                     sbtemp = new StringBuffer(item.getContent());
                     tempItems.clear();
                     tempItems.add(item);
                 }                if(sbtemp.toString().equalsIgnoreCase(keyword)){//暂存的字符串正好匹配到关键词时
                     matches.add(tempItems.get(0));//得到匹配的项
                     sbtemp = new StringBuffer("");//清空暂存的字符串
                     tempItems.clear();//清空暂存的LIST
                     continue;//继续查找
                 }
             }else{//如果找不到则清空
                 sbtemp = new StringBuffer("");
                 tempItems.clear();
             }
         }
         return matches;
     }    /**
      * 替换目标文字,生成新的pdf文件
      * @param bytes 目标pdf
      * @param outputStream
      * @throws Exception
      */
     private static void manipulatePdf(byte[] bytes,OutputStream outputStream,List<MatchItem> matchItems,String keyWord,String keyWordNew) throws Exception{
         PdfReader reader = new PdfReader(bytes);
         PdfStamper stamper = new PdfStamper(reader, outputStream);
         PdfContentByte canvas;
         Map<Integer,List<MatchItem>> mapItem = new HashMap<>();
         List<MatchItem> itemList;
         for(MatchItem item : matchItems){
             Integer pageNum = item.getPageNum();
             if(mapItem.containsKey(pageNum)){
                 itemList = mapItem.get(pageNum);
                 itemList.add(item);
                 mapItem.put(pageNum,itemList);
             }else{
                 itemList = new ArrayList<>();
                 itemList.add(item);
                 mapItem.put(pageNum,itemList);
             }
         }
         //遍历每一页去修改
         for(Integer page : mapItem.keySet()){
             List<MatchItem> items = mapItem.get(page);
             //遍历每一页中的匹配项
             for(MatchItem item : items){
                 canvas = stamper.getOverContent(page);
                 float x = item.getX();
                 float y = item.getY();
                 float fontWidth = item.getFontWidth();
                 float fontHeight = item.getFontHeight();
                 canvas.saveState();
                 canvas.setColorFill(BaseColor.WHITE);
                 canvas.rectangle(x, y,fontWidth*keyWord.length(),fontWidth+2);
                 canvas.fill();
                 canvas.restoreState();
                 //开始写入文本
                 canvas.beginText();
                 BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);
                 Font font = new Font(bf,fontWidth,Font.BOLD);
                 //设置字体和大小
                 canvas.setFontAndSize(font.getBaseFont(), fontWidth);
                 //设置字体的输出位置
                 canvas.setTextMatrix(x, y+fontWidth/10+0.5f);
                 //要输出的text
                 canvas.showText(keyWordNew);                canvas.endText();
             }
         }
         stamper.close();
         reader.close();
     }    /**
      * 替换pdf中指定文字
      * @param srcBytes 目标pdf
      * @param outputStream 新pdf
      * @param keyWord 替换的文字
      * @param keyWordNew 替换后的文字
      * @throws Exception
      */
     public static void pdfReplace(byte[] srcBytes,OutputStream outputStream,String keyWord,String keyWordNew) throws Exception{
         manipulatePdf(srcBytes,outputStream,matchAll(srcBytes,keyWord),keyWord,keyWordNew);
     }
 }

 测试类
 public class test{
 public static void main(String[] args) {
         ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
         FileInputStream inputStream = null;
         FileOutputStream fileOutputStream = null;
         try {
             //源文件pdf
             File file = new File("D:\\test.pdf");
             //目标文件
             File destFile = new File("D:\\dest.pdf");
             inputStream = new FileInputStream(file);
             fileOutputStream = new FileOutputStream(destFile);
             byte[] bytes = new byte[inputStream.available()];
             inputStream.read(bytes);
             //关键字
             String keyWord = "请选择";
             //替换后的内容
             String keyWordNew = "你爸爸";
             PdfUtil.pdfReplace(bytes,outputStream,keyWord,keyWordNew);
             //得到替换后的文件字节
             byte[] byteArray = outputStream.toByteArray();
             //输出
             fileOutputStream.write(byteArray);
         } catch (Exception e) {
             throw new RuntimeException(e);
         } finally {
             try {
                 if (fileOutputStream != null) fileOutputStream.close();
                 if (inputStream != null) inputStream.close();
                 outputStream.close();
             } catch (IOException e) {
                 throw new RuntimeException(e);
             }
         }
     }
 }