Python CSV 文件复制工具:高效处理大文件
@resource_reference{'/dataset/ods/df_orders.csv'}
import os from datetime import datetime
if 'main' == name: # 定义输入文件名和输出文件名 input_file_name = 'dataset/ods/df_orders.csv' output_file_name = '${bdp.system.savingModelPath}' + 'df_orders_1g.csv'
# 打开输出文件,写入第一行(列名)
with open(output_file_name, 'w') as output_file:
with open(input_file_name, 'r') as input_file:
line = input_file.readline()
output_file.writelines(line)
# 逐行读取输入文件,并写入输出文件
line = input_file.readline()
k = 0
while line:
k += 1
output_file.writelines(line)
line = input_file.readline()
# 每100000行打印一次进度
if 0 == k % 100000:
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), k, line, flush=True)
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), k, line, flush=True)
# 输出输入文件和输出文件的大小(单位为MB)
print(os.path.getsize(input_file_name)/1024/1024, 'M')
print(os.path.getsize(output_file_name)/1024/1024, 'M')
原文地址: https://www.cveoy.top/t/topic/oe1q 著作权归作者所有。请勿转载和采集!