import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.PDFOperator;
/**
*
*
*
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>1.8.3</version>
</dependency>
* @author Cloud-Top
*
*/
public class PdfUtil {
public static void main(String[] args) {
removePdfWatermark("C:\\Users\\Cloud-Top\\eclipse-workspace\\demo\\src\\main\\resources\\watermark_pdf\\1629680832866.pdf",
"C:\\Users\\Cloud-Top\\1629680832866.pdf");
}
/**
*
* @param sourPath 原pdf
* @param savePath 新pdf
*/
@SuppressWarnings("unchecked")
public static void removePdfWatermark(String sourPath ,String savePath) {
try {
//读取源文件
PDDocument helloDocument = PDDocument.load(new File(sourPath));
List<PDPage> allPages = helloDocument.getDocumentCatalog().getAllPages();
for(PDPage pdPage : allPages) {
PDStream contents = pdPage.getContents();
PDFStreamParser parser = new PDFStreamParser(contents.getStream());
parser.parse();
List<Object> tokens = parser.getTokens();
for (int j = 0; j < tokens.size(); j++) {
Object next = tokens.get(j);
if (next instanceof PDFOperator) {
PDFOperator op = (PDFOperator) next;
// Tj and TJ are the two operators that display strings in a PDF
if (op.getOperation().equals("Tj")) {
// Tj takes one operator and that is the string
// to display so lets update that operator
COSString previous = (COSString) tokens.get(j - 1);
String string = previous.getString();
System.out.println(string);
//需要解决乱码
//有效期至 == eHgó
if(string.contains("eHgó")) {
//清空匹配上的内容
previous.reset();
}
}
}
}
PDStream updatedStream = new PDStream(helloDocument);
OutputStream out = updatedStream.createOutputStream();
ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
tokenWriter.writeTokens(tokens);
pdPage.setContents(updatedStream);
}
//Output file name
helloDocument.save(savePath);
helloDocument.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (COSVisitorException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
java remove pdf watermark Java去除PDF水印
转载
java remove pdf watermark Java去除PDF水印
本文章为转载内容,我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题,欢迎原作者联系我们进行内容更正或删除文章。
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
PDF批量加水印 与 去除水印实践
我们可以给PDF加图片水印或文字水印,要去除图片水印,一般只需要删除最后一个图片对象即可。要去除文字水印,需要保证主体内容和文字水印
pdf python ci mysql Image