length=len(input_df)
chunk_size = math.ceil(length / 3)
for i, chunk in enumerate(df.groupby(df.index // chunk_size)):
# 切分文件
filename = 'xsp_input_{}.csv'.format(i)
chunk[1].to_csv(filename, index=False,sep='\t', encoding='utf8', quoting=csv.QUOTE_NONE)
afs_addr = '%s/%s/xsp_input_%s.file' % (
i, common.SAMPLE_LABEL_AFS_PATH_DATA, task_id)
common.put_file_to_afs(chunk[1], afs_addr)
# 执行脚本逻辑: 生成 title 和 bosurl
cmd = "cd {}; bash run_qu_qv.sh {}".format(common.VIDEO_DCG_TOOL,
chunk[1])
logging.info("转换 url 到 video: {}".format(cmd))
common.execute_shell_cmd(cmd)
# 处理脚本输出: merge 函数输入和脚本输出
out_df = read_input_data("{}.out".format(chunk[1]))
logging.info("视频脚本输出head为:{}" .format('\t'.join(out_df.columns.to_list())))
afs_out_addr = '%s/%s/xsp_input_out_%s.file' % (
i, common.SAMPLE_LABEL_AFS_PATH_DATA, task_id)
common.put_file_to_afs(chunk[1], afs_out_addr)
df = df.merge(out_df, how="left", on="result_url", suffixes=('', '_DROP'))