gpt4 book ai didi

java - PDFBox 2.0.8 - 从一个文档中提取图像并在另一个文档中使用它

转载 作者:行者123 更新时间:2023-12-02 11:34:11 26 4
gpt4 key购买 nike

我正在编写一个 Java 应用程序作为模板读取器和编写器。我在处理文本方面取得了成功,但在处理图像方面遇到了一些困难......

获取图像是简单的部分 - 使用扩展 PDFStreamEngine 的类

package readingPdf;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.DrawObject;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
import org.apache.pdfbox.contentstream.operator.state.Restore;
import org.apache.pdfbox.contentstream.operator.state.Save;
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.util.Matrix;

public class ImageStripper extends PDFStreamEngine {

ArrayList<Object []> imagesData = null;
public ImageStripper() throws IOException {
// preparing PDFStreamEngine
addOperator(new Concatenate());
addOperator(new DrawObject());
addOperator(new SetGraphicsStateParameters());
addOperator(new Save());
addOperator(new Restore());
addOperator(new SetMatrix());
imagesData = new ArrayList<Object[]>();
}

@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
String operation = operator.getName();
if ("Do".equals(operation)) {
COSName objectName = (COSName) operands.get(0);
// get the PDF object
PDXObject xobject = getResources().getXObject(objectName);
// check if the object is an image object
if (xobject instanceof PDImageXObject) {
Object[] imageData = new Object[3];
PDImageXObject image = (PDImageXObject) xobject;

Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();

// position of image in the pdf in terms of user space units
System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY()
+ " in user space units");

imageData[0] = ctmNew.getTranslateX();// xPos
imageData[1] = ctmNew.getTranslateY();// yPos

imageData[2] = image;//Image

imagesData.add(imageData);

} else if (xobject instanceof PDFormXObject) {
PDFormXObject form = (PDFormXObject) xobject;
showForm(form);
}
} else {
super.processOperator(operator, operands);
}
}

public ArrayList<Object[]> getImagesList(){
return imagesData;
}
}

接下来是其实现

public class PDFManager{

private PDFParser parser;
private PDDocument pdDoc;
private PDDocument retDoc;
private COSDocument cosDoc;
private PDPage page;
private String filePath;
private File file;

public PDDocument transferImage() throws IOException {
this.pdDoc = null;
this.cosDoc = null;

file = new File(filePath);
parser = new PDFParser(new RandomAccessFile(file, "r"));
parser.parse();
cosDoc = parser.getDocument();
pdDoc = new PDDocument(cosDoc);

//Get Image Data
ImageStripper imageStripper = new ImageStripper();
imageStripper.processPage(pdDoc.getPage(0));
ArrayList<Object []> imageList = imageStripper.getImagesList();

//Close Doc
pdDoc.close();
cosDoc.close();

//Create new PDF Doc
retDoc = new PDDocument();
page = new PDPage(new PDRectangle(PDRectangle.A4.getHeight(), PDRectangle.A4.getWidth()));
retDoc.addPage(page);

PDPageContentStream cs = new PDPageContentStream(retDoc, page, AppendMode.OVERWRITE, true);

for(int pos = 0; pos < imageList.size() ; pos++) {
Object [] imageData = imageList.get(pos);

float xPos = (float)imageData[0];
float yPos = (float)imageData[1];
PDImageXObject image = (PDImageXObject)imageData[2];
cs.drawImage(image, xPos, yPos);
}

cs.close();
return retDoc;
}

public static void main(String[] args) throws IOException {

PDFManager pdfManager = new PDFManager();

PDDocument doc =pdfManager.ToText("c:\\test\\test.pdf");

doc.save("c:\\test\\test2.pdf");
doc.close();
}
}

现在问题出现在我编写调用cs.drawImage的地方。所有代码执行时都没有任何问题,除了尝试保存新文件时...我收到异常 COSStream 已关闭且无法读取。也许其封闭的 PDDocument 已关闭?

我怀疑仍然存在元数据将图像链接到提取它的原始文档,因为调用 PDImageXobject.createFromFile("c:\\test\\testImage.png", doc) 返回一个完美编写的 PDImageXObject 的新实例。当写入的 PDDocument 被传递到 PDImageXObject 时,我怀疑它以某种方式链接。

我无法将图像保存到临时位置,因为这只是测试 POC。

如有任何帮助,我们将不胜感激

最佳答案

@蒂尔曼·豪谢尔

感谢您的解决方案

我将原始文档的关闭移至一个单独的方法中,我在写入文件后调用该方法

public void closeFiles(){
pdDoc.close();
cosDoc.close();
}

关于java - PDFBox 2.0.8 - 从一个文档中提取图像并在另一个文档中使用它,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49069201/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com