@resource_reference{'/dataset/ods/df_orders.csv'}

import os from datetime import datetime

if 'main' == name: # 定义输入文件名和输出文件名 input_file_name = 'dataset/ods/df_orders.csv' output_file_name = '${bdp.system.savingModelPath}' + 'df_orders_1g.csv'

# 打开输出文件,写入第一行(列名)
with open(output_file_name, 'w') as output_file:
    with open(input_file_name, 'r') as input_file:
        line = input_file.readline()
        output_file.writelines(line)
        
        # 逐行读取输入文件,并写入输出文件
        line = input_file.readline()
        k = 0
        while line:
            k += 1
            output_file.writelines(line)
            line = input_file.readline()
            
            # 每100000行打印一次进度
            if 0 == k % 100000:
                print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), k, line, flush=True)
        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), k, line, flush=True)

# 输出输入文件和输出文件的大小(单位为MB)
print(os.path.getsize(input_file_name)/1024/1024, 'M')
print(os.path.getsize(output_file_name)/1024/1024, 'M')
Python CSV 文件复制工具:高效处理大文件

原文地址: https://www.cveoy.top/t/topic/oe1q 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录