Java实现将Word和PDF转成一张垂直拼接长图的工具类
作者:Uluoyu
这篇文章主要为大家详细介绍了如何使用Java编写一个将Word和PDF转成一张垂直拼接长图的工具类,支持doc和docx,有需要的小伙伴可以了解下
1.添加依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.2.5</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>5.2.5</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.5</version> <!-- 如果有 DOCX 相关 -->
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>3.0.5</version>
</dependency>
<!-- Word转PDF:Docx4J核心 -->
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-JAXB-ReferenceImpl</artifactId>
<version>8.3.10</version>
</dependency>
<!-- Word转PDF:FO导出支持(用于PDF渲染) -->
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-export-fo</artifactId>
<version>8.3.10</version>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<version>2.3.1</version>
</dependency>
<dependency>
<groupId>org.glassfish.jaxb</groupId>
<artifactId>jaxb-runtime</artifactId>
<version>2.3.3</version>
</dependency>
<!-- 如果 Docx4J 版本旧,还可能需激活模块 -->
<dependency>
<groupId>com.sun.activation</groupId>
<artifactId>javax.activation</artifactId>
<version>1.2.0</version>
</dependency>
2.Java代码
import lombok.extern.slf4j.Slf4j;
import org.apache.fop.apps.Fop;
import org.apache.fop.apps.FopFactory;
import org.apache.fop.apps.MimeConstants;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToFoConverter;
import org.docx4j.Docx4J;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.w3c.dom.Document;
import javax.imageio.ImageIO;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @Description 文档转长图片工具类
* 支持Word (doc/docx) 和 PDF 转成一张垂直拼接的长PNG图片
*/
@Slf4j
public class DocumentToImageUtil {
/**
* 将文档(Word或PDF)转换为长图片字节数组
* @param content 文档字节数组
* @param fileType 文件类型(doc, docx, pdf)
* @return PNG图片字节数组
* @throws IOException 转换失败
*/
public byte[] convertToStitchedImage(byte[] content, String fileType) throws IOException {
byte[] pdfContent = content;
if (fileType.toLowerCase().contains("doc") || fileType.toLowerCase().contains("docx")) {
pdfContent = convertWordToPdf(content, fileType.toLowerCase());
}
return convertPdfToStitchedImage(pdfContent);
}
/**
* Word转PDF(支持doc和docx)
*/
private byte[] convertWordToPdf(byte[] wordContent, String fileType) throws IOException {
if (fileType.toLowerCase().contains("docx")) {
// DOCX转PDF使用Docx4J
try (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent)) {
WordprocessingMLPackage loadedPackage = WordprocessingMLPackage.load(bais);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Docx4J.toPDF(loadedPackage, baos);
return baos.toByteArray();
} catch (Exception e) {
throw new IOException("DOCX转PDF失败", e);
}
} else if (fileType.toLowerCase().contains("doc")) {
// DOC转PDF使用POI HWPF + Transformer + FOP
try (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent);
HWPFDocument document = new HWPFDocument(bais)) {
// 1. 使用 WordToFoConverter 生成 FO Document(移除 setFontResolver,使用默认)
Document foDoc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToFoConverter converter = new WordToFoConverter(foDoc);
// 注意:在 POI 5.2.5 中,setFontResolver 已弃用或移除,使用默认字体解析器
converter.processDocument(document);
// 2. Transformer 将 FO Document 序列化为字节流
ByteArrayOutputStream foBaos = new ByteArrayOutputStream();
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "xml"); // 输出 XML-FO
serializer.transform(new DOMSource(foDoc), new StreamResult(foBaos));
byte[] foBytes = foBaos.toByteArray();
// 3. FOP 将 FO 转 PDF(使用带URI参数的newInstance,避免解析问题)
FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI()); // 标准方式,设置base URI
ByteArrayOutputStream pdfBaos = new ByteArrayOutputStream();
Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, pdfBaos);
Transformer transformer = tf.newTransformer(); // 身份转换器
Source src = new StreamSource(new ByteArrayInputStream(foBytes));
Result res = new SAXResult(fop.getDefaultHandler());
transformer.transform(src, res);
return pdfBaos.toByteArray();
} catch (Exception e) {
throw new IOException("DOC转PDF失败", e);
}
} else {
throw new IOException("不支持的文件类型: " + fileType);
}
}
/**
* PDF转垂直拼接的长图片
*/
private byte[] convertPdfToStitchedImage(byte[] pdfContent) throws IOException {
try (ByteArrayInputStream bais = new ByteArrayInputStream(pdfContent);
RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(bais);
PDDocument document = Loader.loadPDF(buffer)) {
PDFRenderer renderer = new PDFRenderer(document);
int numPages = document.getNumberOfPages();
if (numPages == 0) {
return null;
}
List<BufferedImage> pageImages = new ArrayList<>();
int totalHeight = 0;
int maxWidth = 0;
for (int i = 0; i < numPages; i++) {
BufferedImage pageImage = renderer.renderImageWithDPI(i, 150);
pageImages.add(pageImage);
totalHeight += pageImage.getHeight();
maxWidth = Math.max(maxWidth, pageImage.getWidth());
}
BufferedImage stitchedImage = new BufferedImage(maxWidth, totalHeight, BufferedImage.TYPE_INT_RGB);
Graphics2D g2d = stitchedImage.createGraphics();
g2d.setBackground(java.awt.Color.WHITE);
g2d.clearRect(0, 0, maxWidth, totalHeight);
int yOffset = 0;
for (BufferedImage pageImage : pageImages) {
g2d.drawImage(pageImage, 0, yOffset, null);
yOffset += pageImage.getHeight();
}
g2d.dispose();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(stitchedImage, "png", baos);
return baos.toByteArray();
} catch (Exception e) {
throw new IOException("PDF转图片失败", e);
}
}
}
3.如何使用
byte[] imageBytes = new DocumentToImageUtil().convertToStitchedImage(content, fileType.toLowerCase());
到此这篇关于Java实现将Word和PDF转成一张垂直拼接长图的工具类的文章就介绍到这了,更多相关Java Word和PDF转图片内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!
