Java8 SpringBoot 实现 PDF、DOC、PPT 文件转图片功能
Java8 SpringBoot 实现 PDF、DOC、PPT 文件转图片功能
本教程将使用 Java8 和 SpringBoot 实现一个文件上传接口,接收 PDF、DOC、PPT 文件,并将其每页转换为图片,并进行重命名后保存到指定目录。
1. 添加依赖
在 pom.xml 文件中添加以下依赖:
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.20</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
2. 编写文件上传接口
使用 SpringMVC 的 @RequestParam 注解接收上传的文件,然后根据文件类型进行处理,将每页转为图片重命名并保存到指定目录。
@PostMapping("/upload")
public String upload(@RequestParam("file") MultipartFile file) throws IOException {
String fileName = file.getOriginalFilename();
String fileType = fileName.substring(fileName.lastIndexOf(".") + 1);
if ("pdf".equalsIgnoreCase(fileType)) {
// 如果是 PDF 文件
PDDocument document = PDDocument.load(file.getInputStream());
PDFRenderer renderer = new PDFRenderer(document);
for (int i = 0; i < document.getNumberOfPages(); i++) {
BufferedImage image = renderer.renderImageWithDPI(i, 300);
File outputFile = new File("D:/output/image-" + (i + 1) + ".jpg");
ImageIO.write(image, "jpg", outputFile);
}
document.close();
} else if ("doc".equalsIgnoreCase(fileType) || "docx".equalsIgnoreCase(fileType)) {
// 如果是 Word 文档
XWPFDocument document = new XWPFDocument(file.getInputStream());
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
String text = extractor.getText();
// 将文本保存为 txt 文件
File outputFile = new File("D:/output/" + fileName.substring(0, fileName.lastIndexOf(".")) + ".txt");
FileUtils.writeStringToFile(outputFile, text, Charset.forName("UTF-8"));
// 将每页保存为图片
XWPFDocumentVisitor visitor = new XWPFDocumentVisitor(new PdfConverter());
visitor.visitDocument(document);
document.close();
} else if ("ppt".equalsIgnoreCase(fileType) || "pptx".equalsIgnoreCase(fileType)) {
// 如果是 PowerPoint 文档
XMLSlideShow slideShow = new XMLSlideShow(file.getInputStream());
for (int i = 0; i < slideShow.getSlides().length; i++) {
BufferedImage image = slideShow.getSlides()[i].getThumbnail(1, 1);
File outputFile = new File("D:/output/image-" + (i + 1) + ".jpg");
ImageIO.write(image, "jpg", outputFile);
}
slideShow.close();
}
return "success";
}
3. 实现 Word 文档转 PDF 的 PdfConverter 类
在上面的代码中,我们使用了一个 PdfConverter 类将 Word 文档转为 PDF,这个类需要实现 XWPFVisitor 接口。
public class PdfConverter extends XWPFDefaultVisitor {
private PdfOptions options;
private OutputStream out;
private PdfConverter converter;
public PdfConverter() {
this(new PdfOptions());
}
public PdfConverter(PdfOptions options) {
this.options = options;
}
@Override
public void visitDocumentStart(XWPFDocument document) throws IOException {
super.visitDocumentStart(document);
this.out = new ByteArrayOutputStream();
this.converter = new PdfConverter(this.options);
}
@Override
public void visitDocumentEnd(XWPFDocument document) throws IOException {
super.visitDocumentEnd(document);
this.converter.convert(document, this.out, this.options);
}
@Override
public void visitParagraphStart(XWPFParagraph paragraph) throws IOException {
super.visitParagraphStart(paragraph);
this.converter.visitParagraphStart(paragraph);
}
@Override
public void visitParagraphEnd(XWPFParagraph paragraph) throws IOException {
super.visitParagraphEnd(paragraph);
this.converter.visitParagraphEnd(paragraph);
}
@Override
public void visitRun(XWPFRun run) throws IOException {
super.visitRun(run);
this.converter.visitRun(run);
}
@Override
public void visitTableStart(XWPFTable table) throws IOException {
super.visitTableStart(table);
this.converter.visitTableStart(table);
}
@Override
public void visitTableEnd(XWPFTable table) throws IOException {
super.visitTableEnd(table);
this.converter.visitTableEnd(table);
}
public void convert(XWPFDocument document, OutputStream out, PdfOptions options) throws IOException {
PdfDocument pdf = new PdfDocument(new PdfWriter(out));
pdf.setDefaultPageSize(PageSize.A4);
pdf.setTagged();
pdf.getCatalog().setLang(new PdfString("en-US"));
PdfViewerPreferences viewerPreferences = new PdfViewerPreferences();
viewerPreferences.setDisplayDocTitle(true);
viewerPreferences.setFitWindow(true);
viewerPreferences.setHideToolbar(false);
viewerPreferences.setHideMenubar(false);
viewerPreferences.setHideWindowUI(false);
viewerPreferences.setCenterWindow(true);
viewerPreferences.setDisplayDocTitle(true);
pdf.getCatalog().setViewerPreferences(viewerPreferences);
pdf.getCatalog().setPageMode(PdfName.UseOutlines);
Document documentRenderer = new Document(pdf);
PdfDocumentRenderer pdfDocumentRenderer = new PdfDocumentRenderer(pdf);
pdfDocumentRenderer.setTagStructure(true);
pdfDocumentRenderer.getSharedContext().setReplacedElementFactory(new MediaReplacedElementFactory(pdfDocumentRenderer.getSharedContext()));
pdfDocumentRenderer.getSharedContext().setCssResolver(new StyleResolver() {
@Override
public void addStyle(CssDeclaration declaration) {
}
@Override
public void addStyles(List<CssDeclaration> styles) {
}
@Override
public CssDeclaration getDefaultStyle(String selector) {
return null;
}
@Override
public CssDeclaration getStyle(String selector) {
return null;
}
@Override
public void removeStyle(String selector) {
}
});
documentRenderer.setRenderer(pdfDocumentRenderer);
documentRenderer.setTagFactory(Tags.getHtmlTagProcessorFactory());
documentRenderer.setProperty(HtmlRendererContext.IMAGES_PROVIDER, new AbstractRendererBuilder.ImagesProvider() {
@Override
public ImageProvider getImageResource(String uri) {
byte[] data = Base64.getDecoder().decode(uri.substring(uri.indexOf(",") + 1));
return new ImageProvider(new ByteArrayInputStream(data));
}
});
documentRenderer.setProperty(HtmlRendererContext.RENDERING_MODE, RenderingMode.HTML);
documentRenderer.setProperty(HtmlRendererContext.CHARACTER_ENCODING, "UTF-8");
documentRenderer.setProperty(HtmlRendererContext.FONT_PROVIDER, new DefaultFontProvider(false, false, false));
documentRenderer.setProperty(HtmlRendererContext.CSS_CLASS_PREFIX, "pdf-css-");
documentRenderer.setProperty(HtmlRendererContext.PDF_RENDERER_CONTEXT, pdfDocumentRenderer.getSharedContext());
documentRenderer.setProperty(HtmlRendererContext.MEDIA_REPLACEMENT, MediaReplacedElementFactory.DEFAULT_MEDIA_REPLACEMENT);
documentRenderer.setProperty(HtmlRendererContext.IMG_HANDLING_CONFIG, new ImageHandlingConfig() {
@Override
public Float getDeviceViewportSize() {
return null;
}
@Override
public Float getDevicePixelRatio() {
return null;
}
@Override
public boolean isDotsPerPixel() {
return false;
}
@Override
public boolean isImageAllowedOnProcessor(String tag, String attributeName, String attributeValue) {
return true;
}
});
documentRenderer.setProperty(HtmlRendererContext.ANCHOR_HANDLER, new AnchorHandler() {
@Override
public void resolveDestination(String id) {
}
@Override
public void processEnd(String name, Map<String, String> attributes) {
}
@Override
public void processStart(String name, Map<String, String> attributes) {
}
});
documentRenderer.setProperty(HtmlRendererContext.CSS_MAP, new HashMap<String, String>());
documentRenderer.setProperty(HtmlRendererContext.HTML_ATTR_MAP, new HashMap<String, String>());
documentRenderer.setProperty(HtmlRendererContext.HTML_ATTR_PREFIX, "");
documentRenderer.setProperty(HtmlRendererContext.TABLE_PROCESSOR, new TableProcessor());
documentRenderer.setProperty(HtmlRendererContext.FONT_PROVIDER, new DefaultFontProvider(false, false, false));
documentRenderer.setProperty(ParserProperties.STREAM_FACTORY, new StringStreamFactory());
documentRenderer.setProperty(ParserProperties.ATTRIBUTE_VALUE_PARSER, new AttributeValueParser() {
@Override
public String parseAttributeValue(String value) {
return value;
}
});
documentRenderer.setProperty(ParserProperties.USE_CSS, true);
documentRenderer.setProperty(ParserProperties.FOREIGN_ATTRIBUTES, new HashMap<String, String>());
documentRenderer.setProperty(ParserProperties.IS_STRICT, false);
documentRenderer.setProperty(ParserProperties.REFLOW_CACHING, false);
documentRenderer.setProperty(ParserProperties.IGNORE_QUESTIONS, true);
documentRenderer.setProperty(ParserProperties.STREAM_FACTORY, new StringStreamFactory());
documentRenderer.setProperty(ParserProperties.ATTRIBUTE_VALUE_PARSER, new AttributeValueParser() {
@Override
public String parseAttributeValue(String value) {
return value;
}
});
documentRenderer.setProperty(ParserProperties.USE_CSS, true);
documentRenderer.setProperty(ParserProperties.FOREIGN_ATTRIBUTES, new HashMap<String, String>());
documentRenderer.setProperty(ParserProperties.IS_STRICT, false);
documentRenderer.setProperty(ParserProperties.REFLOW_CACHING, false);
documentRenderer.setProperty(ParserProperties.IGNORE_QUESTIONS, true);
this.converter = new PdfConverter(options);
this.converter.visitDocument(documentRenderer);
documentRenderer.flush();
}
}
4. 实现 Word 文档转 PDF 的 MediaReplacedElementFactory 类
在上面的代码中,我们使用了一个 MediaReplacedElementFactory 类将 Word 文档中的图片转为 PDF 中的图片,这个类需要实现 ReplacedElementFactory 接口。
public class MediaReplacedElementFactory extends AbstractReplacedElementFactory {
public static final String DEFAULT_MEDIA_REPLACEMENT = "<div style=\"width:%s;height:%s;background-color:%s\">%s</div>";
private PdfDocumentRenderer renderer;
public MediaReplacedElementFactory(PdfDocumentRenderer renderer) {
this.renderer = renderer;
}
@Override
public ReplacedElement createReplacedElement(LayoutContext c, BlockBox box, UserAgentCallback uac, int cssWidth, int cssHeight) {
Element element = box.getElement();
if (element != null) {
String nodeName = element.getNodeName();
if (nodeName.equals("img")) {
try {
String srcAttr = element.getAttribute("src");
byte[] data = null;
if (srcAttr.startsWith("data:image/")) {
data = Base64.getDecoder().decode(srcAttr.substring(srcAttr.indexOf(",") + 1));
} else {
InputStream is = this.renderer.getSharedContext().getUac().getInputStream(srcAttr);
data = IOUtils.toByteArray(is);
}
ImageData imageData = ImageDataFactory.create(data);
float width = imageData.getWidth();
float height = imageData.getHeight();
if (cssWidth != -1) {
width = cssWidth;
}
if (cssHeight != -1) {
height = cssHeight;
}
Image image = new Image(imageData);
image.scaleToFit(width, height);
PdfFormXObject xObject = new PdfFormXObject(new Rectangle(image.getImageScaledWidth(), image.getImageScaledHeight()));
new PdfCanvas(xObject, this.renderer.getPdfDocument()).addXObject(image.getXObject(), 0, 0);
return new PdfReplacedElement(xObject, new Dimension(image.getImageScaledWidth(), image.getImageScaledHeight()));
} catch (IOException e) {
e.printStackTrace();
}
}
}
return super.createReplacedElement(c, box, uac, cssWidth, cssHeight);
}
@Override
public void reset() {
}
}
5. 运行程序
启动 SpringBoot 应用,访问 http://localhost:8080/index.html,上传 PDF、DOC、PPT 文件,程序将自动将每页转为图片重命名并保存到指定目录。
原文地址: https://www.cveoy.top/t/topic/mg7c 著作权归作者所有。请勿转载和采集!