Java8 SpringBoot 实现 PDF、DOC、PPT 文件转图片功能

本教程将使用 Java8 和 SpringBoot 实现一个文件上传接口,接收 PDF、DOC、PPT 文件,并将其每页转换为图片,并进行重命名后保存到指定目录。

1. 添加依赖

pom.xml 文件中添加以下依赖:

<dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>2.0.20</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>4.1.2</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.1.2</version>
</dependency>

2. 编写文件上传接口

使用 SpringMVC 的 @RequestParam 注解接收上传的文件,然后根据文件类型进行处理,将每页转为图片重命名并保存到指定目录。

@PostMapping("/upload")
public String upload(@RequestParam("file") MultipartFile file) throws IOException {
    String fileName = file.getOriginalFilename();
    String fileType = fileName.substring(fileName.lastIndexOf(".") + 1);
    
    if ("pdf".equalsIgnoreCase(fileType)) {
        // 如果是 PDF 文件
        PDDocument document = PDDocument.load(file.getInputStream());
        PDFRenderer renderer = new PDFRenderer(document);
        for (int i = 0; i < document.getNumberOfPages(); i++) {
            BufferedImage image = renderer.renderImageWithDPI(i, 300);
            File outputFile = new File("D:/output/image-" + (i + 1) + ".jpg");
            ImageIO.write(image, "jpg", outputFile);
        }
        document.close();
    } else if ("doc".equalsIgnoreCase(fileType) || "docx".equalsIgnoreCase(fileType)) {
        // 如果是 Word 文档
        XWPFDocument document = new XWPFDocument(file.getInputStream());
        XWPFWordExtractor extractor = new XWPFWordExtractor(document);
        String text = extractor.getText();
        // 将文本保存为 txt 文件
        File outputFile = new File("D:/output/" + fileName.substring(0, fileName.lastIndexOf(".")) + ".txt");
        FileUtils.writeStringToFile(outputFile, text, Charset.forName("UTF-8"));
        // 将每页保存为图片
        XWPFDocumentVisitor visitor = new XWPFDocumentVisitor(new PdfConverter());
        visitor.visitDocument(document);
        document.close();
    } else if ("ppt".equalsIgnoreCase(fileType) || "pptx".equalsIgnoreCase(fileType)) {
        // 如果是 PowerPoint 文档
        XMLSlideShow slideShow = new XMLSlideShow(file.getInputStream());
        for (int i = 0; i < slideShow.getSlides().length; i++) {
            BufferedImage image = slideShow.getSlides()[i].getThumbnail(1, 1);
            File outputFile = new File("D:/output/image-" + (i + 1) + ".jpg");
            ImageIO.write(image, "jpg", outputFile);
        }
        slideShow.close();
    }
    
    return "success";
}

3. 实现 Word 文档转 PDF 的 PdfConverter 类

在上面的代码中,我们使用了一个 PdfConverter 类将 Word 文档转为 PDF,这个类需要实现 XWPFVisitor 接口。

public class PdfConverter extends XWPFDefaultVisitor {

    private PdfOptions options;
    private OutputStream out;
    private PdfConverter converter;

    public PdfConverter() {
        this(new PdfOptions());
    }

    public PdfConverter(PdfOptions options) {
        this.options = options;
    }

    @Override
    public void visitDocumentStart(XWPFDocument document) throws IOException {
        super.visitDocumentStart(document);
        this.out = new ByteArrayOutputStream();
        this.converter = new PdfConverter(this.options);
    }

    @Override
    public void visitDocumentEnd(XWPFDocument document) throws IOException {
        super.visitDocumentEnd(document);
        this.converter.convert(document, this.out, this.options);
    }

    @Override
    public void visitParagraphStart(XWPFParagraph paragraph) throws IOException {
        super.visitParagraphStart(paragraph);
        this.converter.visitParagraphStart(paragraph);
    }

    @Override
    public void visitParagraphEnd(XWPFParagraph paragraph) throws IOException {
        super.visitParagraphEnd(paragraph);
        this.converter.visitParagraphEnd(paragraph);
    }

    @Override
    public void visitRun(XWPFRun run) throws IOException {
        super.visitRun(run);
        this.converter.visitRun(run);
    }

    @Override
    public void visitTableStart(XWPFTable table) throws IOException {
        super.visitTableStart(table);
        this.converter.visitTableStart(table);
    }

    @Override
    public void visitTableEnd(XWPFTable table) throws IOException {
        super.visitTableEnd(table);
        this.converter.visitTableEnd(table);
    }

    public void convert(XWPFDocument document, OutputStream out, PdfOptions options) throws IOException {
        PdfDocument pdf = new PdfDocument(new PdfWriter(out));
        pdf.setDefaultPageSize(PageSize.A4);
        pdf.setTagged();
        pdf.getCatalog().setLang(new PdfString("en-US"));
        PdfViewerPreferences viewerPreferences = new PdfViewerPreferences();
        viewerPreferences.setDisplayDocTitle(true);
        viewerPreferences.setFitWindow(true);
        viewerPreferences.setHideToolbar(false);
        viewerPreferences.setHideMenubar(false);
        viewerPreferences.setHideWindowUI(false);
        viewerPreferences.setCenterWindow(true);
        viewerPreferences.setDisplayDocTitle(true);
        pdf.getCatalog().setViewerPreferences(viewerPreferences);
        pdf.getCatalog().setPageMode(PdfName.UseOutlines);
        Document documentRenderer = new Document(pdf);
        PdfDocumentRenderer pdfDocumentRenderer = new PdfDocumentRenderer(pdf);
        pdfDocumentRenderer.setTagStructure(true);
        pdfDocumentRenderer.getSharedContext().setReplacedElementFactory(new MediaReplacedElementFactory(pdfDocumentRenderer.getSharedContext()));
        pdfDocumentRenderer.getSharedContext().setCssResolver(new StyleResolver() {
            @Override
            public void addStyle(CssDeclaration declaration) {
            }

            @Override
            public void addStyles(List<CssDeclaration> styles) {
            }

            @Override
            public CssDeclaration getDefaultStyle(String selector) {
                return null;
            }

            @Override
            public CssDeclaration getStyle(String selector) {
                return null;
            }

            @Override
            public void removeStyle(String selector) {
            }
        });
        documentRenderer.setRenderer(pdfDocumentRenderer);
        documentRenderer.setTagFactory(Tags.getHtmlTagProcessorFactory());
        documentRenderer.setProperty(HtmlRendererContext.IMAGES_PROVIDER, new AbstractRendererBuilder.ImagesProvider() {
            @Override
            public ImageProvider getImageResource(String uri) {
                byte[] data = Base64.getDecoder().decode(uri.substring(uri.indexOf(",") + 1));
                return new ImageProvider(new ByteArrayInputStream(data));
            }
        });
        documentRenderer.setProperty(HtmlRendererContext.RENDERING_MODE, RenderingMode.HTML);
        documentRenderer.setProperty(HtmlRendererContext.CHARACTER_ENCODING, "UTF-8");
        documentRenderer.setProperty(HtmlRendererContext.FONT_PROVIDER, new DefaultFontProvider(false, false, false));
        documentRenderer.setProperty(HtmlRendererContext.CSS_CLASS_PREFIX, "pdf-css-");
        documentRenderer.setProperty(HtmlRendererContext.PDF_RENDERER_CONTEXT, pdfDocumentRenderer.getSharedContext());
        documentRenderer.setProperty(HtmlRendererContext.MEDIA_REPLACEMENT, MediaReplacedElementFactory.DEFAULT_MEDIA_REPLACEMENT);
        documentRenderer.setProperty(HtmlRendererContext.IMG_HANDLING_CONFIG, new ImageHandlingConfig() {
            @Override
            public Float getDeviceViewportSize() {
                return null;
            }

            @Override
            public Float getDevicePixelRatio() {
                return null;
            }

            @Override
            public boolean isDotsPerPixel() {
                return false;
            }

            @Override
            public boolean isImageAllowedOnProcessor(String tag, String attributeName, String attributeValue) {
                return true;
            }
        });
        documentRenderer.setProperty(HtmlRendererContext.ANCHOR_HANDLER, new AnchorHandler() {
            @Override
            public void resolveDestination(String id) {
            }

            @Override
            public void processEnd(String name, Map<String, String> attributes) {
            }

            @Override
            public void processStart(String name, Map<String, String> attributes) {
            }
        });
        documentRenderer.setProperty(HtmlRendererContext.CSS_MAP, new HashMap<String, String>());
        documentRenderer.setProperty(HtmlRendererContext.HTML_ATTR_MAP, new HashMap<String, String>());
        documentRenderer.setProperty(HtmlRendererContext.HTML_ATTR_PREFIX, "");
        documentRenderer.setProperty(HtmlRendererContext.TABLE_PROCESSOR, new TableProcessor());
        documentRenderer.setProperty(HtmlRendererContext.FONT_PROVIDER, new DefaultFontProvider(false, false, false));
        documentRenderer.setProperty(ParserProperties.STREAM_FACTORY, new StringStreamFactory());
        documentRenderer.setProperty(ParserProperties.ATTRIBUTE_VALUE_PARSER, new AttributeValueParser() {
            @Override
            public String parseAttributeValue(String value) {
                return value;
            }
        });
        documentRenderer.setProperty(ParserProperties.USE_CSS, true);
        documentRenderer.setProperty(ParserProperties.FOREIGN_ATTRIBUTES, new HashMap<String, String>());
        documentRenderer.setProperty(ParserProperties.IS_STRICT, false);
        documentRenderer.setProperty(ParserProperties.REFLOW_CACHING, false);
        documentRenderer.setProperty(ParserProperties.IGNORE_QUESTIONS, true);
        documentRenderer.setProperty(ParserProperties.STREAM_FACTORY, new StringStreamFactory());
        documentRenderer.setProperty(ParserProperties.ATTRIBUTE_VALUE_PARSER, new AttributeValueParser() {
            @Override
            public String parseAttributeValue(String value) {
                return value;
            }
        });
        documentRenderer.setProperty(ParserProperties.USE_CSS, true);
        documentRenderer.setProperty(ParserProperties.FOREIGN_ATTRIBUTES, new HashMap<String, String>());
        documentRenderer.setProperty(ParserProperties.IS_STRICT, false);
        documentRenderer.setProperty(ParserProperties.REFLOW_CACHING, false);
        documentRenderer.setProperty(ParserProperties.IGNORE_QUESTIONS, true);
        this.converter = new PdfConverter(options);
        this.converter.visitDocument(documentRenderer);
        documentRenderer.flush();
    }
}

4. 实现 Word 文档转 PDF 的 MediaReplacedElementFactory 类

在上面的代码中,我们使用了一个 MediaReplacedElementFactory 类将 Word 文档中的图片转为 PDF 中的图片,这个类需要实现 ReplacedElementFactory 接口。

public class MediaReplacedElementFactory extends AbstractReplacedElementFactory {

    public static final String DEFAULT_MEDIA_REPLACEMENT = "<div style=\"width:%s;height:%s;background-color:%s\">%s</div>";

    private PdfDocumentRenderer renderer;

    public MediaReplacedElementFactory(PdfDocumentRenderer renderer) {
        this.renderer = renderer;
    }

    @Override
    public ReplacedElement createReplacedElement(LayoutContext c, BlockBox box, UserAgentCallback uac, int cssWidth, int cssHeight) {
        Element element = box.getElement();
        if (element != null) {
            String nodeName = element.getNodeName();
            if (nodeName.equals("img")) {
                try {
                    String srcAttr = element.getAttribute("src");
                    byte[] data = null;
                    if (srcAttr.startsWith("data:image/")) {
                        data = Base64.getDecoder().decode(srcAttr.substring(srcAttr.indexOf(",") + 1));
                    } else {
                        InputStream is = this.renderer.getSharedContext().getUac().getInputStream(srcAttr);
                        data = IOUtils.toByteArray(is);
                    }
                    ImageData imageData = ImageDataFactory.create(data);
                    float width = imageData.getWidth();
                    float height = imageData.getHeight();
                    if (cssWidth != -1) {
                        width = cssWidth;
                    }
                    if (cssHeight != -1) {
                        height = cssHeight;
                    }
                    Image image = new Image(imageData);
                    image.scaleToFit(width, height);
                    PdfFormXObject xObject = new PdfFormXObject(new Rectangle(image.getImageScaledWidth(), image.getImageScaledHeight()));
                    new PdfCanvas(xObject, this.renderer.getPdfDocument()).addXObject(image.getXObject(), 0, 0);
                    return new PdfReplacedElement(xObject, new Dimension(image.getImageScaledWidth(), image.getImageScaledHeight()));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return super.createReplacedElement(c, box, uac, cssWidth, cssHeight);
    }

    @Override
    public void reset() {
    }
}

5. 运行程序

启动 SpringBoot 应用,访问 http://localhost:8080/index.html,上传 PDF、DOC、PPT 文件,程序将自动将每页转为图片重命名并保存到指定目录。

Java8 SpringBoot 实现 PDF、DOC、PPT 文件转图片功能

原文地址: https://www.cveoy.top/t/topic/mg7c 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录