input_folder = "D:\南巢\第六学期\大数据与会计分析\贵州茅台财务分析\年报" output_folder = "D:\南巢\第六学期\大数据与会计分析\贵州茅台财务分析\年报"

for pdf_file in glob.glob(os.path.join(input_folder, "*.pdf")): with open(pdf_file, "rb") as f: pdf_reader = PyPDF2.PdfReader(f) for i in range(len(pdf_reader.pages)): page = pdf_reader.pages[i] text = '\n'.join(page.extract_text()) output_file = os.path.splitext(os.path.basename(pdf_file))[0] + ".txt" output_file = output_file.replace("年报", "第{}页".format(i+1)) with open(os.path.join(output_folder, output_file), "w", encoding="utf-8") as f: f.write(text


原文地址: https://www.cveoy.top/t/topic/hp5u 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录