Groovy PDFBox 识别二维码:教程和代码示例
使用 PDFBox 识别二维码需要以下步骤:
- 导入相关库文件
import org.apache.pdfbox.pdmodel.PDDocument
import org.apache.pdfbox.pdmodel.PDPage
import org.apache.pdfbox.pdmodel.common.PDStream
import org.apache.pdfbox.text.PDFTextStripper
import org.apache.pdfbox.tools.imageio.ImageIOUtil
import org.apache.pdfbox.contentstream.operator.Operator
import org.apache.pdfbox.cos.COSBase
import org.apache.pdfbox.cos.COSName
import org.apache.pdfbox.cos.COSStream
import org.apache.pdfbox.filter.Filter
import org.apache.pdfbox.filter.FilterFactory
import org.apache.pdfbox.filter.FilterManager
import org.apache.pdfbox.filter.FilterOperator
import org.apache.pdfbox.filter.MultiplyFilter
import org.apache.pdfbox.filter.MultiplyNonStrokingCA
import org.apache.pdfbox.filter.MultiplyNonStrokingCS
import org.apache.pdfbox.filter.MultiplyNonStrokingCMYK
import org.apache.pdfbox.filter.MultiplyNonStrokingGray
import org.apache.pdfbox.filter.MultiplyNonStrokingRGB
import org.apache.pdfbox.filter.MultiplyStrokingCA
import org.apache.pdfbox.filter.MultiplyStrokingCS
import org.apache.pdfbox.filter.MultiplyStrokingCMYK
import org.apache.pdfbox.filter.MultiplyStrokingGray
import org.apache.pdfbox.filter.MultiplyStrokingRGB
import org.apache.pdfbox.filter.MultiplyAll
import org.apache.pdfbox.filter.MultiplyAllNonStroking
import org.apache.pdfbox.filter.MultiplyAllStroking
import org.apache.pdfbox.filter.MultiplyAllNonStrokingCMYK
import org.apache.pdfbox.filter.MultiplyAllStrokingCMYK
import org.apache.pdfbox.filter.MultiplyAllNonStrokingGray
import org.apache.pdfbox.filter.MultiplyAllStrokingGray
import org.apache.pdfbox.filter.MultiplyAllNonStrokingRGB
import org.apache.pdfbox.filter.MultiplyAllStrokingRGB
import org.apache.pdfbox.filter.MultiplyAllNonStrokingCA
import org.apache.pdfbox.filter.MultiplyAllStrokingCA
import org.apache.pdfbox.filter.MultiplyAllNonStrokingCS
import org.apache.pdfbox.filter.MultiplyAllStrokingCS
import org.apache.pdfbox.filter.MultiplyAllStrokingNone
import org.apache.pdfbox.filter.MultiplyAllNonStrokingNone
import org.apache.pdfbox.filter.MultiplyAllStrokingPattern
import org.apache.pdfbox.filter.MultiplyAllNonStrokingPattern
- 读取 PDF 文件
PDDocument document = PDDocument.load(new File('input.pdf'))
- 获取 PDF 页面
PDPage page = document.getPage(0)
- 获取页面内容
PDFTextStripper stripper = new PDFTextStripper()
String text = stripper.getText(document)
- 遍历页面内容,查找二维码
List<COSBase> arguments = new ArrayList<COSBase>()
for (int i = 0; i < page.getContents().getOperators().size(); i++) {
Operator operator = page.getContents().getOperators().get(i)
if (operator.getName().equals('q')) {
COSStream stream = (COSStream) page.getContents().get(i - 1)
Filter filter = FilterFactory.INSTANCE.getFilter(stream)
PDStream pdStream = new PDStream(stream)
pdStream.addCompression()
pdStream.setFilters(filter)
pdStream.setDecodeParms(stream.getDecodeParams())
pdStream.writeTo(new ByteArrayOutputStream())
COSName filterName = filter.getName()
FilterManager manager = FilterFactory.INSTANCE.getFilterManager()
FilterOperator filterOperator = manager.getFilterOperator(filterName)
if (filterOperator instanceof MultiplyFilter) {
arguments.add(filterName)
}
}
}
- 将二维码提取出来
for (COSBase argument : arguments) {
if (argument instanceof COSName) {
COSName cosName = (COSName) argument
if (cosName.getName().equals('DCTDecode')) {
// 提取 JPEG 二维码
PDStream stream = page.getContents().getImages().get(0).getStream()
ImageIOUtil.writeImage(stream.getByteArray(), 'output.jpg', 300)
} else if (cosName.getName().equals('JPXDecode')) {
// 提取 JPEG2000 二维码
PDStream stream = page.getContents().getImages().get(0).getStream()
ImageIOUtil.writeImage(stream.getByteArray(), 'output.jp2', 300)
} else if (cosName.getName().equals('CCITTFaxDecode')) {
// 提取 CCITT 二维码
PDStream stream = page.getContents().getImages().get(0).getStream()
ImageIOUtil.writeImage(stream.getByteArray(), 'output.tif', 300)
} else if (cosName.getName().equals('JBIG2Decode')) {
// 提取 JBIG2 二维码
PDStream stream = page.getContents().getImages().get(0).getStream()
ImageIOUtil.writeImage(stream.getByteArray(), 'output.jb2', 300)
} else if (cosName.getName().equals('RunLengthDecode')) {
// 提取 RLE 二维码
PDStream stream = page.getContents().getImages().get(0).getStream()
ImageIOUtil.writeImage(stream.getByteArray(), 'output.rle', 300)
} else if (cosName.getName().equals('FlateDecode')) {
// 提取 ZLIB 二维码
PDStream stream = page.getContents().getImages().get(0).getStream()
ImageIOUtil.writeImage(stream.getByteArray(), 'output.png', 300)
} else if (cosName.getName().equals('LZWDecode')) {
// 提取 LZW 二维码
PDStream stream = page.getContents().getImages().get(0).getStream()
ImageIOUtil.writeImage(stream.getByteArray(), 'output.tif', 300)
}
}
}
- 关闭 PDF 文件
document.close()
注意:
- 确保您已安装 PDFBox 库。
- 将 'input.pdf' 替换为您的 PDF 文件路径。
- 将 'output.jpg' 替换为您的输出图像文件路径。
原文地址: https://www.cveoy.top/t/topic/nDuU 著作权归作者所有。请勿转载和采集!