Java实现Markdown转为PDF的两种方式
作者:进阶的猿猴
文章介绍了两种将Markdown转换为PDF的方法:一是使用商业授权的Spire.Doc框架,操作简单但需要授权;二是使用开源的Markdown到HTML再到PDF的转换流程,需要自行实现转换逻辑,需要的朋友可以参考下
推荐两种方式:
方式一:使用spire.doc框架(非常简单,但需要商业授权)
- (需要商业授权,没有授权不能商用,且生成开头有标记)
- 也有免费版本spire.doc.free(但缺少很多功能)
方式二:markdown -》 html -》pdf(开源,但要自己实现转换)
- 可以将markdown先转为html,在将html转为pdf的方式,实现两者的转换
方式一:spire框架实现
引入jar包
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.doc</artifactId>
<version>13.8.7</version>
</dependency>需要定义一下仓库地址
<repositories>
<repository>
<id>com.e-iceblue</id>
<url>https://repo.e-iceblue.cn/repository/maven-public/</url>
</repository>
</repositories>定义转换代码
public static void main(String[] args) {
Document document = new Document();
document.loadFromFile("C:\\Users\\Administrator\\Desktop\\test111\\123.md", FileFormat.Markdown);
document.saveToFile("C:\\Users\\Administrator\\Desktop\\test111\\123.docx", FileFormat.PDF);
}成功,非常简单,且可以指定为FileFormat.Docx转为word,但需要商业授权
方式二:markdown -》 html -》pdf
这种方式有很多框架可以实现,可以自由搭配,主要能实现markdown转为html,html转为pdf即可
搭配一:commonmark + openhtmltopdf
- commonmark:将markdown转为html
- openhtmltopdf:将html转为pdf
- jsoup:用来纠正html格式(因为markdown填写的内容并不是符合条件的,比如代码块外定义标签)
<!-- CommonMark -->
<dependency>
<groupId>org.commonmark</groupId>
<artifactId>commonmark</artifactId>
<version>0.18.0</version>
</dependency>
<dependency>
<groupId>org.commonmark</groupId>
<artifactId>commonmark-ext-gfm-tables</artifactId>
<version>0.18.0</version>
</dependency>
<!-- OpenHTMLtoPDF (基于 iText 7) -->
<dependency>
<groupId>com.openhtmltopdf</groupId>
<artifactId>openhtmltopdf-core</artifactId>
<version>1.0.10</version>
</dependency>
<dependency>
<groupId>com.openhtmltopdf</groupId>
<artifactId>openhtmltopdf-pdfbox</artifactId>
<version>1.0.10</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>实现
- 将 Markdown 转换为 HTML 片段
- 将 HTML 中的本地图片嵌入为 Base64 Data URI(这个如果不需要图片的,可以不用加)
- 包装为完整的 HTML 文档(含 CSS 样式,css样式模板)
- 转换为 XHTML 兼容格式(用来保证转化的html格式正确)
- 将 XHTML 渲染为 PDF
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
import org.commonmark.ext.gfm.tables.TablesExtension;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Base64;
public class TestPDF {
public static void main(String[] args) throws Exception {
String inputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.md";
String outputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.pdf";
// 1. 将 Markdown 转换为 HTML 片段
String htmlFragment = convertMarkdownToHtml(inputPath);
// 2. 将 HTML 中的本地图片嵌入为 Base64 Data URI
String embeddedHtml = embedLocalImagesAsDataUri(htmlFragment, inputPath);
// 3. 包装为完整的 HTML 文档(含 CSS 样式)
String fullHtml = wrapWithCompleteHtml(embeddedHtml);
// 4. 转换为 XHTML 兼容格式
String xhtmlContent = convertToXhtml(fullHtml);
// 5. 将 XHTML 渲染为 PDF
renderXhtmlToPdf(xhtmlContent, outputPath);
}
/**
* 将 XHTML 内容渲染为 PDF 文件。
*/
private static void renderXhtmlToPdf(String xhtmlContent, String outputPath) throws IOException {
// Step 4: 使用 OpenHTMLtoPDF (PDFBox 后端) 转 PDF
try (OutputStream os = new FileOutputStream(outputPath)) {
PdfRendererBuilder builder = new PdfRendererBuilder();
builder.useFastMode(); // 可选:提升速度
builder.withHtmlContent(xhtmlContent, null); // 第二个参数是 baseUri,用于解析相对路径资源(如图片)
builder.toStream(os);
String lowerCase = System.getProperty("os.name").toLowerCase();
if (lowerCase.contains("win")) {
// Windows: 尝试宋体、微软雅黑
tryRegisterFont(builder, "C:/Windows/Fonts/simsun.ttc", "SimSun");
tryRegisterFont(builder, "C:/Windows/Fonts/msyh.ttc", "Microsoft YaHei");
} else if (lowerCase.contains("nix") || lowerCase.contains("nux") || lowerCase.contains("mac")) {
// Linux / macOS: 常见中文字体
// Noto Sans CJK (Google)
tryRegisterFont(builder, "/usr/share/fonts/noto/NotoSansCJK-Regular.ttc", "Noto Sans CJK SC");
tryRegisterFont(builder, "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", "Noto Sans CJK SC");
// WenQuanYi Micro Hei (文泉驿微米黑)
tryRegisterFont(builder, "/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", "WenQuanYi Micro Hei");
tryRegisterFont(builder, "/usr/share/fonts/wenquanyi/wqy-microhei/wqy-microhei.ttc", "WenQuanYi Micro Hei");
// macOS
if (lowerCase.contains("mac")) {
tryRegisterFont(builder, "/System/Library/Fonts/PingFang.ttc", "PingFang SC");
tryRegisterFont(builder, "/System/Library/Fonts/Helvetica.ttc", "Helvetica");
}
}
builder.run();
}
System.out.println("PDF 已生成: " + outputPath);
}
/**
* 将 HTML 字符串转换为 XHTML 兼容格式。
*/
private static String convertToXhtml(String html) {
Document doc = Jsoup.parse(html);
doc.outputSettings()
.syntax(Document.OutputSettings.Syntax.xml) // 输出 XML 语法
.escapeMode(Entities.EscapeMode.xhtml) // 实体转义(如 & → &)
.prettyPrint(false); // 禁用美化,避免多余空白
return doc.html();
}
/**
* 读取 Markdown 文件并将其转换为 HTML 片段。
*/
private static String convertMarkdownToHtml(String markdownFilePath) throws IOException {
// Step 1: 读取 Markdown 文件
byte[] bytes = Files.readAllBytes(Paths.get(markdownFilePath));
String markdown = new String(bytes, "UTF-8");
// Step 2: 使用 CommonMark 解析并转为 HTML(支持表格)
Parser parser = Parser.builder()
.extensions(Arrays.asList(TablesExtension.create()))
.build();
HtmlRenderer renderer = HtmlRenderer.builder()
.extensions(Arrays.asList(TablesExtension.create()))
.build();
String html = renderer.render(parser.parse(markdown));
return html;
}
/**
* 将 HTML 片段包装为完整的 HTML 文档,包含内联 CSS 样式。
*/
private static String wrapWithCompleteHtml(String htmlFragment) {
// Step 3: 构建完整 HTML(含 CSS)
String css =
"body {\n" +
" font-family: 'Noto Sans SC', SimSun, Arial, sans-serif;\n" +
" margin: 40px;\n" +
" line-height: 1.6;\n" +
" color: #333;\n" +
" font-size: 12pt;\n" +
"}\n" +
"h1, h2, h3, h4, h5, h6 {\n" +
" color: #2c3e50;\n" +
" margin-top: 1.2em;\n" +
" margin-bottom: 0.6em;\n" +
"}\n" +
"h1 { font-size: 24pt; }\n" +
"h2 { font-size: 20pt; }\n" +
"h3 { font-size: 16pt; }\n" +
"\n" +
"p, ul, ol, pre, table {\n" +
" margin: 0 0 16px 0;\n" +
"}\n" +
"\n" +
"a {\n" +
" color: #3498db;\n" +
" text-decoration: none;\n" +
"}\n" +
"\n" +
"strong { font-weight: bold; }\n" +
"em { font-style: italic; }\n" +
"del { text-decoration: line-through; }\n" +
"\n" +
"/* 表格:自适应 + 边框 */\n" +
"table {\n" +
" border-collapse: collapse;\n" +
" width: 100%;\n" +
" empty-cells: show;\n" +
"}\n" +
"th, td {\n" +
" border: 1px solid #999;\n" +
" padding: 8px 12px;\n" +
" text-align: left;\n" +
" vertical-align: top;\n" +
"}\n" +
"th {\n" +
" background-color: #f0f0f0;\n" +
" font-weight: bold;\n" +
"}\n" +
"\n" +
"/* 代码块 */\n" +
"code {\n" +
" font-family: Consolas, Monaco, monospace;\n" +
" background-color: #f8f8f8;\n" +
" padding: 2px 4px;\n" +
" border-radius: 3px;\n" +
" font-size: 11pt;\n" +
"}\n" +
"pre {\n" +
" background-color: #f8f8f8;\n" +
" padding: 12px;\n" +
" border-left: 4px solid #ccc;\n" +
" overflow: hidden;\n" +
" white-space: pre-wrap;\n" +
" font-family: Consolas, Monaco, monospace;\n" +
" font-size: 10pt;\n" +
"}\n" +
"\n" +
"/* 列表 */\n" +
"ul, ol {\n" +
" padding-left: 24px;\n" +
"}\n" +
"li {\n" +
" margin-bottom: 6px;\n" +
"}\n" +
"\n" +
"/* 图片(可选) */\n" +
"img {\n" +
" max-width: 100%;\n" +
" height: auto;\n" +
"}\n";
// 可选:包装成完整 HTML(带 CSS)
String fullHtml =
"<!DOCTYPE html>\n" +
"<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" +
"<head>\n" +
" <meta charset=\"UTF-8\" />\n" +
" <title>Markdown to PDF</title>\n" +
" <style type=\"text/css\">\n" +
css + // ← 直接插入 css 变量(不通过 %s)
" </style>\n" +
"</head>\n" +
"<body>\n" +
" " + htmlFragment + "\n" + // ← 直接插入 html 变量
"</body>\n" +
"</html>";
return fullHtml;
}
/**
* 尝试注册指定路径的字体到 PDF 渲染器。
*/
private static void tryRegisterFont(PdfRendererBuilder builder, String fontPath, String fontFamilyName) {
File fontFile = new File(fontPath);
if (fontFile.exists()) {
System.out.println("✅ 注册字体: " + fontFamilyName + " (" + fontPath + ")");
builder.useFont(fontFile, fontFamilyName);
} else {
System.out.println("⚠️ 字体未找到: " + fontPath);
}
}
/**
* 将 HTML 中引用的本地图片转换为 Base64 Data URI 并内联到 HTML 中。
*/
private static String embedLocalImagesAsDataUri(String html, String markdownFilePath) throws IOException {
Path markdownBasePath = Paths.get(markdownFilePath).getParent();
Document doc = Jsoup.parse(html);
for (Element img : doc.select("img")) {
String src = img.attr("src");
if (src == null || src.trim().isEmpty()) continue;
if (src.startsWith("data:")) continue; // 已内联
if (src.startsWith("http://") || src.startsWith("https://")) {
System.err.println("Warning: Remote image not embedded: " + src);
continue;
}
Path imgPath;
// >>>>>>>>>> 新增:支持绝对 Windows 路径(以盘符开头) <<<<<<<<<<
if (src.matches("^[A-Za-z]:\\\\.*")) { // 匹配 D:\xxx 形式
// 将反斜杠替换为正斜杠(兼容性更好)
src = src.replace("\\", "/");
imgPath = Paths.get(src);
} else {
// 相对路径:相对于 Markdown 文件所在目录
imgPath = markdownBasePath.resolve(src).normalize();
// 安全检查:防止路径穿越
if (!imgPath.startsWith(markdownBasePath)) {
throw new SecurityException("Invalid relative image path: " + src);
}
}
// >>>>>>>>>> END 新增 <<<<<<<<<<
if (!Files.exists(imgPath)) {
System.err.println("Image not found: " + imgPath);
continue;
}
byte[] imageBytes = Files.readAllBytes(imgPath);
String mimeType = Files.probeContentType(imgPath);
if (mimeType == null || !mimeType.startsWith("image/")) {
mimeType = "image/png";
}
String dataUri = "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(imageBytes);
img.attr("src", dataUri);
}
return doc.body().html();
}
}搭配二:flexmark+flying-saucer-pdf
flexmark:将markdown转为htmlflying-saucer-pdf:将html转为pdf- jsoup:用来纠正html格式(因为markdown填写的内容并不是符合条件的,比如代码块外定义标签)
<dependency>
<groupId>com.vladsch.flexmark</groupId>
<artifactId>flexmark-all</artifactId>
<version>0.50.44</version>
</dependency>
<!-- Flying Saucer: HTML to PDF -->
<dependency>
<groupId>org.xhtmlrenderer</groupId>
<artifactId>flying-saucer-pdf</artifactId>
<version>9.1.22</version>
</dependency>实现
- 将 Markdown 转换为 HTML 片段
- 将 HTML 中的本地图片嵌入为 Base64 Data URI(这个如果不需要图片的,可以不用加)
- 包装为完整的 HTML 文档(含 CSS 样式,css样式模板)
- 转换为 XHTML 兼容格式(用来保证转化的html格式正确)
- 将 XHTML 渲染为 PDF
import com.lowagie.text.DocumentException;
import com.lowagie.text.pdf.BaseFont;
import com.vladsch.flexmark.ext.tables.TablesExtension;
import com.vladsch.flexmark.html.HtmlRenderer;
import com.vladsch.flexmark.parser.Parser;
import com.vladsch.flexmark.util.data.MutableDataSet;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Base64;
public class TestPDF {
public static void main(String[] args) throws Exception {
String inputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.md";
String outputPath = "C:\\Users\\Administrator\\Desktop\\test111\\456.pdf";
// 1. 将 Markdown 转换为 HTML 片段
String htmlFragment = convertMarkdownToHtml(inputPath);
// 2. 将 HTML 中的本地图片嵌入为 Base64 Data URI
String embeddedHtml = embedLocalImagesAsDataUri(htmlFragment, inputPath);
// 3. 包装为完整的 HTML 文档(含 CSS 样式)
String fullHtml = wrapWithCompleteHtml(embeddedHtml);
// 4. 转换为 XHTML 兼容格式
String xhtmlContent = convertToXhtml(fullHtml);
// 5. 将 XHTML 渲染为 PDF
renderXhtmlToPdf(xhtmlContent, outputPath);
}
/**
* 将 XHTML 内容渲染为 PDF 文件。
*/
private static void renderXhtmlToPdf(String xhtmlContent, String outputPath) throws IOException {
// Step 4: 使用 OpenHTMLtoPDF (PDFBox 后端) 转 PDF
try (OutputStream out = new FileOutputStream(outputPath)) {
ITextRenderer pdfRenderer = new ITextRenderer();
// 👇 关键:注册中文字体
ITextFontResolver fontResolver = pdfRenderer.getFontResolver();
// Windows 系统路径(宋体)
String osName = System.getProperty("os.name").toLowerCase();
if (osName.contains("win")) {
// Windows: 宋体、微软雅黑(.ttc 支持良好)
fontResolver.addFont("C:/Windows/Fonts/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
fontResolver.addFont("C:/Windows/Fonts/msyh.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
} else if (osName.contains("nix") || osName.contains("nux")) {
// Linux
fontResolver.addFont("/usr/share/fonts/noto/NotoSansCJK-Regular.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
fontResolver.addFont("/usr/share/fonts/truetype/wqy/wqy-microhei.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
} else if (osName.contains("mac")) {
// macOS
fontResolver.addFont("/System/Library/Fonts/PingFang.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
}
pdfRenderer.setDocumentFromString(xhtmlContent);
pdfRenderer.layout();
pdfRenderer.createPDF(out);
} catch (DocumentException e) {
throw new RuntimeException(e);
}
System.out.println("PDF 已生成: " + outputPath);
}
/**
* 将 HTML 字符串转换为 XHTML 兼容格式。
*/
private static String convertToXhtml(String html) {
Document doc = Jsoup.parse(html);
doc.outputSettings()
.syntax(Document.OutputSettings.Syntax.xml) // 输出 XML 语法
.escapeMode(Entities.EscapeMode.xhtml) // 实体转义(如 & → &)
.prettyPrint(false); // 禁用美化,避免多余空白
return doc.html();
}
/**
* 读取 Markdown 文件并将其转换为 HTML 片段。
*/
private static String convertMarkdownToHtml(String markdownFilePath) throws IOException {
// Step 1: 读取 Markdown 文件
byte[] bytes = Files.readAllBytes(Paths.get(markdownFilePath));
String markdown = new String(bytes, "utf-8");
// Step 1: Markdown -> HTML
MutableDataSet options = new MutableDataSet();
options.set(Parser.EXTENSIONS, Arrays.asList(TablesExtension.create())); // 注册 TablesExtension
Parser parser = Parser.builder(options).build();
HtmlRenderer renderer = HtmlRenderer.builder(options).build();
String html = renderer.render(parser.parse(markdown));
return html;
}
/**
* 将 HTML 片段包装为完整的 HTML 文档,包含内联 CSS 样式。
*/
private static String wrapWithCompleteHtml(String htmlFragment) {
// Step 3: 构建完整 HTML(含 CSS)
String css =
"body {\n" +
" font-family: 'Noto Sans SC', SimSun, Arial, sans-serif;\n" +
" margin: 40px;\n" +
" line-height: 1.6;\n" +
" color: #333;\n" +
" font-size: 12pt;\n" +
"}\n" +
"h1, h2, h3, h4, h5, h6 {\n" +
" color: #2c3e50;\n" +
" margin-top: 1.2em;\n" +
" margin-bottom: 0.6em;\n" +
"}\n" +
"h1 { font-size: 24pt; }\n" +
"h2 { font-size: 20pt; }\n" +
"h3 { font-size: 16pt; }\n" +
"\n" +
"p, ul, ol, pre, table {\n" +
" margin: 0 0 16px 0;\n" +
"}\n" +
"\n" +
"a {\n" +
" color: #3498db;\n" +
" text-decoration: none;\n" +
"}\n" +
"\n" +
"strong { font-weight: bold; }\n" +
"em { font-style: italic; }\n" +
"del { text-decoration: line-through; }\n" +
"\n" +
"/* 表格:自适应 + 边框 */\n" +
"table {\n" +
" border-collapse: collapse;\n" +
" width: 100%;\n" +
" empty-cells: show;\n" +
"}\n" +
"th, td {\n" +
" border: 1px solid #999;\n" +
" padding: 8px 12px;\n" +
" text-align: left;\n" +
" vertical-align: top;\n" +
"}\n" +
"th {\n" +
" background-color: #f0f0f0;\n" +
" font-weight: bold;\n" +
"}\n" +
"\n" +
"/* 代码块 */\n" +
"code {\n" +
" font-family: 'Noto Sans SC', SimSun, Consolas, Monaco, monospace !important;\n" +
" background-color: #f8f8f8;\n" +
" padding: 2px 4px;\n" +
" border-radius: 3px;\n" +
" font-size: 11pt;\n" +
"}\n" +
"pre {\n" +
" font-family: 'Noto Sans SC', SimSun, Consolas, Monaco, monospace !important;\n" +
" background-color: #f8f8f8;\n" +
" padding: 12px;\n" +
" border-left: 4px solid #ccc;\n" +
" overflow: hidden;\n" +
" white-space: pre-wrap;\n" +
" font-size: 10pt;\n" +
"}\n" +
"\n" +
"/* 列表 */\n" +
"ul, ol {\n" +
" padding-left: 24px;\n" +
"}\n" +
"li {\n" +
" margin-bottom: 6px;\n" +
"}\n" +
"\n" +
"/* 图片(可选) */\n" +
"img {\n" +
" max-width: 100%;\n" +
" height: auto;\n" +
"}";
// 可选:包装成完整 HTML(带 CSS)
String fullHtml =
"<!DOCTYPE html>\n" +
"<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" +
"<head>\n" +
" <meta charset=\"UTF-8\" />\n" +
" <title>Markdown to PDF</title>\n" +
" <style type=\"text/css\">\n" +
css + // ← 直接插入 css 变量(不通过 %s)
" </style>\n" +
"</head>\n" +
"<body>\n" +
" " + htmlFragment + "\n" + // ← 直接插入 html 变量
"</body>\n" +
"</html>";
return fullHtml;
}
/**
* 将 HTML 中引用的本地图片转换为 Base64 Data URI 并内联到 HTML 中。
*/
private static String embedLocalImagesAsDataUri(String html, String markdownFilePath) throws IOException {
Path markdownBasePath = Paths.get(markdownFilePath).getParent();
Document doc = Jsoup.parse(html);
for (Element img : doc.select("img")) {
String src = img.attr("src");
if (src == null || src.trim().isEmpty()) continue;
if (src.startsWith("data:")) continue; // 已内联
if (src.startsWith("http://") || src.startsWith("https://")) {
System.err.println("Warning: Remote image not embedded: " + src);
continue;
}
Path imgPath;
// >>>>>>>>>> 新增:支持绝对 Windows 路径(以盘符开头) <<<<<<<<<<
if (src.matches("^[A-Za-z]:\\\\.*")) { // 匹配 D:\xxx 形式
// 将反斜杠替换为正斜杠(兼容性更好)
src = src.replace("\\", "/");
imgPath = Paths.get(src);
} else {
// 相对路径:相对于 Markdown 文件所在目录
imgPath = markdownBasePath.resolve(src).normalize();
// 安全检查:防止路径穿越
if (!imgPath.startsWith(markdownBasePath)) {
throw new SecurityException("Invalid relative image path: " + src);
}
}
// >>>>>>>>>> END 新增 <<<<<<<<<<
if (!Files.exists(imgPath)) {
System.err.println("Image not found: " + imgPath);
continue;
}
byte[] imageBytes = Files.readAllBytes(imgPath);
String mimeType = Files.probeContentType(imgPath);
if (mimeType == null || !mimeType.startsWith("image/")) {
mimeType = "image/png";
}
String dataUri = "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(imageBytes);
img.attr("src", dataUri);
}
return doc.body().html();
}
}以上就是Java实现Markdown转为PDF的两种方式的详细内容,更多关于Java Markdown转为PDF的资料请关注脚本之家其它相关文章!
