@resource_reference{"/dataset/ods/df_orders.csv"}

import os from datetime import datetime

if "main" == name: # 定义输入文件名和输出文件名 input_file_name = "dataset/ods/df_orders.csv" output_file_name = "${bdp.system.savingModelPath}" + "df_orders_1g.csv"

# 打开输出文件,写入第一行(列名)
with open(output_file_name, "w") as output_file:
    with open(input_file_name, "r") as input_file:
        line = input_file.readline()
        output_file.writelines(line)
        
        # 逐行读取输入文件,并写入输出文件
        line = input_file.readline()
        k = 0
        while line:
            k += 1
            output_file.writelines(line)
            line = input_file.readline()
            
            # 每100000行打印一次进度
            if 0 == k % 100000:
                print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), k, line, flush=True)
        print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), k, line, flush=True)

# 输出输入文件和输出文件的大小(单位为MB)
print(os.path.getsize(input_file_name)/1024/1024, "M")
print(os.path.getsize(output_file_name)/1024/1024, "M"
翻译一下# resource_referencedatasetodsdf_orderscsvimport osfrom datetime import datetimeif __main__ == __name__ input_file_name = datasetodsdf_orderscsv output_file_name = $bdpsystemsavingModelPath

原文地址: http://www.cveoy.top/t/topic/fqcG 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录