翻译一下# resource_referencedatasetodsdf_orderscsvimport osfrom datetime import datetimeif __main__ == __name__ input_file_name = datasetodsdf_orderscsv output_file_name = $bdpsystemsavingModelPath
@resource_reference{"/dataset/ods/df_orders.csv"}
import os from datetime import datetime
if "main" == name: # 定义输入文件名和输出文件名 input_file_name = "dataset/ods/df_orders.csv" output_file_name = "${bdp.system.savingModelPath}" + "df_orders_1g.csv"
# 打开输出文件,写入第一行(列名)
with open(output_file_name, "w") as output_file:
with open(input_file_name, "r") as input_file:
line = input_file.readline()
output_file.writelines(line)
# 逐行读取输入文件,并写入输出文件
line = input_file.readline()
k = 0
while line:
k += 1
output_file.writelines(line)
line = input_file.readline()
# 每100000行打印一次进度
if 0 == k % 100000:
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), k, line, flush=True)
print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), k, line, flush=True)
# 输出输入文件和输出文件的大小(单位为MB)
print(os.path.getsize(input_file_name)/1024/1024, "M")
print(os.path.getsize(output_file_name)/1024/1024, "M"
原文地址: http://www.cveoy.top/t/topic/fqcG 著作权归作者所有。请勿转载和采集!