Java实现PDF转图片的三种方法详解
作者:工贼sk
这篇文章主要为大家详细介绍了Java实现PDF转图片的三种方法,文中的示例代码讲解详细,具有一定的借鉴价值,有需要的小伙伴可以参考一下
使用PDFbox获取pdf文件的内容和图片
<!-- 依赖 -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.23</version>
</dependency>
获取传入的pdf的图片
我这里是分页读取的,更灵活
public static void main(String[] args) throws IOException {
Integer count = 0;
File file = new File("D:\\Data\\电子图书馆_使用文档.pdf");
FileInputStream fis = new FileInputStream(file);
PDDocument document = PDDocument.load(fis);
int allPages = document.getNumberOfPages();
for (int i = 0; i < allPages; i++) {
PDPage page = document.getPage(i);
PDResources resources = page.getResources();
Iterable<COSName> xObjectNames = resources.getXObjectNames();
if (xObjectNames != null){
Iterator<COSName> iterator = xObjectNames.iterator();
while (iterator.hasNext()){
COSName key = iterator.next();
if (resources.isImageXObject(key)){
PDImageXObject image = (PDImageXObject) resources.getXObject(key);
BufferedImage bImage = image.getImage();
ImageIO.write(bImage, "PNG", new File("D:\\image\\"+"image_"+ (i+1) + "页" + count + ".png"));
count++;
}
}
}
}
document.close();
}
获取传入的pdf的文字内容
同样的分页读取
public static void main(String[] args) throws IOException {
PDDocument doc = new PDDocument();
File file = new File("D:\\Data\\Java课件\\xxx.pdf");
FileInputStream fis = new FileInputStream(file);
doc = PDDocument.load(fis);
PDFTextStripper pdfStripper = new PDFTextStripper();
Splitter splitter = new Splitter();
List<PDDocument> split = splitter.split(doc);
for (int i = 0; i < split.size(); i++) {
doc = split.get(i);
PDFRenderer pdfRenderer = new PDFRenderer(doc);
String text = pdfStripper.getText(doc);
System.out.println("第"+(i+1)+"页内容:"+text);
}
doc.close();
}
读取pdf中的图片信息
package org.fzzn.component.ai.guard.duplicatechecking.tool;
import cn.hutool.core.collection.CollUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.fzzn.component.ai.guard.AiGuardManager;
import org.fzzn.component.ai.guard.duplicatechecking.dto.CheckingDoc;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* pdf中图片提取器
*
* @author inkef
* @since 2024/11/7 17:50
*/
@Slf4j
public class PdfImageExtractor {
private List<CheckingDoc> imgList;
public List<CheckingDoc> extractAndUploadImages(byte[] pdf) throws IOException {
imgList = new ArrayList<>();
PDDocument document = PDDocument.load(pdf);
PDPageTree pages = document.getDocumentCatalog().getPages();
for (PDPage page : pages) {
PDResources resources = page.getResources();
for (COSName name : resources.getXObjectNames()) {
PDXObject xobject = resources.getXObject(name);
if (xobject instanceof PDImageXObject) {
if (!name.getName().startsWith("Image")) {
continue;
}
PDImageXObject image = (PDImageXObject) xobject;
Integer number = Integer.parseInt(name.getName().substring(5));
String fileType = "png";
String fileName = number + "." + fileType;
byte[] imageBytes = bufferedImageToByteArray(image.getImage(), fileType);
String url = AiGuardManager.me().getHandler().handleUploadFile(imageBytes, fileName);
CheckingDoc doc = new CheckingDoc();
doc.setId((long) number);
doc.setUrl(url);
imgList.add(doc);
}
}
}
return imgList;
}
/**
* 将 BufferedImage 转换为 byte[]
*
* @param bufferedImage 要转换的 BufferedImage 对象
* @param formatName 图像格式(例如 "png", "jpg")
* @return 字节数组
*/
private static byte[] bufferedImageToByteArray(BufferedImage bufferedImage, String formatName) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
ImageIO.write(bufferedImage, formatName, baos);
baos.flush();
byte[] imageBytes = baos.toByteArray();
baos.close();
return imageBytes;
} catch (IOException e) {
log.error("bufferedImageToByteArray error", e);
}
return null;
}
/**
* 解析返回pdf中指定的 xrefs 列表中的图片
*
* @param pdf
* @param xrefs
* @return
* @throws IOException
*/
public List<CheckingDoc> extractAndUploadImages(byte[] pdf, List<Integer> xrefs) throws IOException {
// 是否提取全部,如果没有指定,提取全部
Boolean isExtractAll = CollUtil.isEmpty(xrefs);
imgList = new ArrayList<>();
PDDocument document = PDDocument.load(pdf);
PDPageTree pages = document.getDocumentCatalog().getPages();
for (PDPage page : pages) {
PDResources resources = page.getResources();
for (COSName name : resources.getXObjectNames()) {
PDXObject xobject = resources.getXObject(name);
if (xobject instanceof PDImageXObject) {
if (!name.getName().startsWith("Image")) {
continue;
}
PDImageXObject image = (PDImageXObject) xobject;
Integer number = Integer.parseInt(name.getName().substring(5));
// 如果不在指定的xrefs中,跳过
if(!isExtractAll && !xrefs.contains(number)){
continue;
}
String fileType = "png";
String fileName = number + "." + fileType;
byte[] imageBytes = bufferedImageToByteArray(image.getImage(), fileType);
String url = AiGuardManager.me().getHandler().handleUploadFile(imageBytes, fileName);
CheckingDoc doc = new CheckingDoc();
doc.setId((long) number);
doc.setUrl(url);
imgList.add(doc);
}
}
}
return imgList;
}
}
Java实现PDF转图片的三种方法
提示:生成图片以后需要将文件流关闭,不然删除文件会删除失败很多人不知道怎么将pdf的文件转换成图片格式的,而且网上有很例子是跑不通的,同是也是方便自己在用到该需求的时候能够快速度地写出来,所以整理了几种pdf转换成图片的方法工具类。
一、使用开源库Apache PDFBox将PDF转换为图片
1、引入依赖库
<dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>fontbox</artifactId> <version>2.0.9</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox --> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.9</version> </dependency> <!-- https://mvnrepository.com/artifact/commons-logging/commons-logging --> <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> <version>1.2</version> </dependency>
2、实现pdf转换图片工具类(多页pdf会生成多页的图片,后缀会生成图片的位置序号)
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class Pdf2Png {
/**
* 使用pdfbox将整个pdf转换成图片
*
* @param fileAddress 文件地址 如:C:\\Users\\user\\Desktop\\test
* @param filename PDF文件名不带后缀名
* @param type 图片类型 png 和jpg
*/
public static void pdf2png(String fileAddress, String filename, String type) {
long startTime = System.currentTimeMillis();
// 将文件地址和文件名拼接成路径 注意:线上环境不能使用\\拼接
File file = new File(fileAddress + "\\" + filename + ".pdf");
try {
// 写入文件
PDDocument doc = PDDocument.load(file);
PDFRenderer renderer = new PDFRenderer(doc);
int pageCount = doc.getNumberOfPages();
for (int i = 0; i < pageCount; i++) {
// dpi为144,越高越清晰,转换越慢
BufferedImage image = renderer.renderImageWithDPI(i, 144); // Windows native DPI
// 将图片写出到该路径下
ImageIO.write(image, type, new File(fileAddress + "\\" + filename + "_" + (i + 1) + "." + type));
}
long endTime = System.currentTimeMillis();
System.out.println("共耗时:" + ((endTime - startTime) / 1000.0) + "秒"); //转化用时
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
pdf2png("C:\\Users\\user\\Desktop\\test", "测试", "png");
}
}使用Apache PDFBox将PDF转换为图片成功

3、按照固定页数来将pdf转换成图片的工具类(自由选择pdf转换图片的页数)
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class Pdf2Png {
/**
* 自由确定起始页和终止页
* @param fileAddress 文件地址 如:C:\\Users\\user\\Desktop\\test
* @param filename PDF文件名不带后缀名
* @param indexOfStart 开始页 开始转换的页码,从0开始
* @param indexOfEnd 结束页 停止转换的页码,-1为全部
* @param type 图片类型 png 和jpg
*/
public static void pdf2png(String fileAddress,String filename,int indexOfStart,int indexOfEnd,String type) {
long startTime = System.currentTimeMillis();
// 将文件地址和文件名拼接成路径 注意:线上环境不能使用\\拼接
File file = new File(fileAddress+"\\"+filename+".pdf");
try {
PDDocument doc = PDDocument.load(file);
PDFRenderer renderer = new PDFRenderer(doc);
int pageCount = doc.getNumberOfPages();
for (int i = indexOfStart; i < indexOfEnd; i++) {
// dpi为144,越高越清晰,转换越慢
BufferedImage image = renderer.renderImageWithDPI(i, 144); // Windows native DPI
// 将图片写出到该路径下
ImageIO.write(image, type, new File(fileAddress+"\\"+filename+"_"+(i+1)+"."+type));
}
long endTime = System.currentTimeMillis();
System.out.println("共耗时:" + ((endTime - startTime) / 1000.0) + "秒"); // 转换用时
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
pdf2png("C:\\Users\\user\\Desktop\\test", "思泰得流式检测报告-00420299-任蛆小-RA202302100117",2,3, "png");
}
}自由页数转换成功

二、使用PDF Box将多页的pdf转换一张长图片的方法
1、引入PDF Box需要的依赖
<dependency>
<groupId>net.sf.cssbox</groupId>
<artifactId>pdf2dom</artifactId>
<version>1.7</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.12</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>2.0.12</version>
</dependency>2、编写将多页PDF转换多张图片的工具类
import com.lowagie.text.pdf.PdfReader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class Pdf2Png {
/***
* PDF文件转PNG图片,全部页数
* @param pdfFilePath pdf完整路径:C:\\Users\\user\\Desktop\\test\\1234.pdf
* @param dpi dpi越大转换后越清晰,相对转换速度越慢
*/
public static void pdf2Image(String pdfFilePath, int dpi) {
long startTime = System.currentTimeMillis();
File file = new File(pdfFilePath);
PDDocument pdDocument;
try {
String imgPdfPath = file.getParent();
int dot = file.getName().lastIndexOf('.');
// 获取图片文件名
String imagePdfName = file.getName().substring(0, dot);
pdDocument = PDDocument.load(file);
PDFRenderer renderer = new PDFRenderer(pdDocument);
/* dpi越大转换后越清晰,相对转换速度越慢 */
PdfReader reader = new PdfReader(pdfFilePath);
int pages = reader.getNumberOfPages();
StringBuffer imgFilePath;
for (int i = 0; i < pages; i++) {
String imgFilePathPrefix = imgPdfPath + File.separator + imagePdfName;
imgFilePath = new StringBuffer();
imgFilePath.append(imgFilePathPrefix);
imgFilePath.append("_");
imgFilePath.append((i + 1));
imgFilePath.append(".png");
File dstFile = new File(imgFilePath.toString());
BufferedImage image = renderer.renderImageWithDPI(i, dpi);
ImageIO.write(image, "png", dstFile);
}
long endTime = System.currentTimeMillis();
System.out.println("共耗时:" + ((endTime - startTime) / 1000.0) + "秒"); //转化用时
} catch (IOException e) {
e.printStackTrace();
}
}
}三、使用文件流整个pdf转换成图片 (生成图片,并将生成的图片路径返回)
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.*;
public class Pdf2Png {
/**
* 使用文件流整个pdf转换成图片
* @param fileAddress 文件地址 如:C:\\Users\\user\\Desktop\\test
* @param filename PDF文件名不带后缀名
* @param type 图片类型 png 、jpg
*/
public static List<Map<String, String>> pdfToImage(String fileAddress, String filename, String type) {
long startTime = System.currentTimeMillis();
List<Map<String, String>> list = new ArrayList<>();
Map<String, String> resultMap = null;
PDDocument pdDocument = null;
String fileName = null;
String imgPath = null;
try {
// 将文件地址和文件名拼接成路径 注意:线上环境不能使用\\拼接
File FilePath = new File(fileAddress + "\\" + filename + ".pdf");
// 文件流
FileInputStream inputStream = new FileInputStream(FilePath);
int dpi = 296;
pdDocument = PDDocument.load(inputStream);
PDFRenderer renderer = new PDFRenderer(pdDocument);
int pageCount = pdDocument.getNumberOfPages();
/* dpi越大转换后越清晰,相对转换速度越慢 */
for (int i = 0; i < pageCount; i++) {
resultMap = new HashMap<>();
fileName = filename + "_" + (i + 1) + "." + type;
imgPath = fileAddress + "\\" + fileName;
BufferedImage image = renderer.renderImageWithDPI(i, dpi);
ImageIO.write(image, type, new File(imgPath));
resultMap.put("fileName", fileName);
resultMap.put("filePath", imgPath); // 图片路径
list.add(resultMap);
}
long endTime = System.currentTimeMillis();
System.out.println("共耗时:" + ((endTime - startTime) / 1000.0) + "秒"); //转化用时
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
// 这里需要关闭PDDocument,不然如果想要删除pdf文件时会提示文件正在使用,无法删除的情况
pdDocument.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return list;
}
public static void main(String[] args) throws FileNotFoundException {
pdfToImage("C:\\Users\\user\\Desktop\\test", "测试", "png");
}
}pdf转图片
Apache PDFBox 库是一个开源、用于操作 PDF 文档的 Java 工具库。PDFBox 允许创建新的 PDF 文档、操作现有文档,以及从文档中提取内容。获取 Apache PDFBox 下载地址,目前版本 2.0.25,核心 jar 是 pdfbox-2.0.25.jar,其他还有几个 jar 可以根据需要进行导入。
pdf合并
package pdfUtity;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import java.io.File;
import java.io.IOException;
import java.util.Scanner;
//import org.apache.pdfbox.util.PDFMergerUtility;
public class pdfMerge {
private static String[] getPdfs(String fileAddress) throws IOException {
File file = new File(fileAddress);
String[] pdfs;
if (file.isDirectory()) {
pdfs = file.list();
return pdfs;
} else {
throw new IOException("输入的路径有问题");
}
}
public static void main(String[] args) throws Exception {
Scanner in = new Scanner(System.in);
PDFMergerUtility mergePdf = new PDFMergerUtility();
System.out.println("请输入要合并的PDF文件所在的文件夹路径");
String fileAddress = in.nextLine();
System.out.println("你输入的路径是:" + fileAddress);
String destinationFileName = "javaweb2020.pdf";
String[] pdfs = getPdfs(fileAddress);
for (int i = 0; i < pdfs.length; i++)
mergePdf.addSource(fileAddress + File.separator + pdfs[i]);
mergePdf.setDestinationFileName(destinationFileName);
System.out.println("合并比较费时间,请等待个几分钟吧!");
mergePdf.mergeDocuments();
System.out.print("合并完成");
}
}pdf拆分为单页
package pdfUtity;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdmodel.PDDocument;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Iterator;
public class pdfSplitter {
public static void main(String[] args)throws IOException {
//把需要拆分的pdf文件加载进来
File file = new File("d://desktopfile//pdfs//1.pdf");
PDDocument document = PDDocument.load(file);
//创建一个拆分器对象
Splitter splitter = new Splitter();
//list中存放好被拆分的pdf对象 其中内容是pdf的每一页
List<PDDocument>Pages = splitter.split(document);
//创建迭代器对象
Iterator<PDDocument>iterator = Pages.listIterator();
//saving splits as individual PDF document
int i = 1;
while(iterator.hasNext()) {
PDDocument pd = iterator.next();
pd.save("d://desktopfile//pdfPhotos//"+i++ +".pdf");
}
System.out.println("pdf拆分成功");
document.close();
}
}pdf转换为图片
package pdfUtity;/*
* 读取 pdf,将其中的某一页另存为 png 图片
*/
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
public class PDFSavePNG
{
public static void main(String[] args)
{
try
{
// 打开来源 pdf
PDDocument pdfDocument = PDDocument.load(new File("d://desktopfile//pdfs//1.pdf"));
PDFRenderer pdfRenderer = new PDFRenderer(pdfDocument);
for(int pageNumber=0;pageNumber<pdfDocument.getNumberOfPages();pageNumber++){
// 提取的页码
// int pageNumber = 0;
// 以300 dpi 读取存入 BufferedImage 对象
int dpi = 300;
BufferedImage buffImage = pdfRenderer.renderImageWithDPI(pageNumber, dpi, ImageType.RGB);
// 将 BufferedImage 写入到 png
ImageIOUtil.writeImage(buffImage, "d://desktopfile//pdfPhotos//"+pageNumber+".png", dpi);
// 关闭文档
}
pdfDocument.close();
}
catch (InvalidPasswordException e)
{
e.printStackTrace();
}
catch (IOException e)
{
e.printStackTrace();
}
}
}
图片转换为pdf
package pdfUtity;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
/**
* @author: Serendipity
* Date: 2022/3/16 22:40
* Description:
*/
public class photoToPDF {
/**
* 多图片合成pdf的限制后缀
*/
private static final List IMAGE_SUFFIX = Arrays.asList("jpg", "png", "jpeg");
/**
* 多个图片合成一个pdf
*
* @param imgFolder 多图片的文件夹路径 例如:"D:\\image\\"
* @param target 合并的图片路径 "D:\\image\\merge.pdf"
* @throws IOException
*/
public static void manyImageToOnePdf(String imgFolder, String target) throws IOException {
PDDocument doc = new PDDocument();
//创建一个空的pdf文件
doc.save(target);
PDPage page;
PDImageXObject pdImage;
PDPageContentStream contents;
BufferedImage bufferedImage;
String fileName;
float w, h;
String suffix;
File tempFile;
int index;
File folder = new File(imgFolder);
for (int i = 0; i < folder.listFiles().length; i++) {
tempFile = folder.listFiles()[i];
if (!tempFile.isFile()) {
continue;
}
fileName = tempFile.getName();
index = fileName.lastIndexOf(".");
if (index == -1) {
continue;
}
//获取文件的后缀
suffix = fileName.substring(index + 1);
//如果文件后缀不是图片格式,跳过当前循环
if (!IMAGE_SUFFIX.contains(suffix)) {
continue;
}
bufferedImage = ImageIO.read(folder.listFiles()[i]);
//Retrieving the page
pdImage = LosslessFactory.createFromImage(doc, bufferedImage);
w = pdImage.getWidth();
h = pdImage.getHeight();
page = new PDPage(new PDRectangle(w, h));
contents = new PDPageContentStream(doc, page);
contents.drawImage(pdImage, 0, 0, w, h);
System.out.println("Image inserted");
contents.close();
doc.addPage(page);
}
//保存pdf
doc.save(target);
//关闭pdf
doc.close();
}
public static void main(String[] args) {
try {
manyImageToOnePdf("d://图片","photo.pdf");
} catch (IOException e) {
e.printStackTrace();
}
}
}以上就是Java实现PDF转图片的三种方法详解的详细内容,更多关于Java PDF转图片的资料请关注脚本之家其它相关文章!
