多进程版本
import click
import glob
import os
import numpy as np
import multiprocessing
import time
def run_process(index, from_path, to_path, queue_lock, writer_lock, files, results):
# 记录该进程处理文件数
count = 0
while 1:
# 取文件
queue_lock.acquire()
if files:
file = files.pop()
queue_lock.release()
else:
queue_lock.release()
break
count += 1
print("Process %s get file %s" % (multiprocessing.current_process().name, file))
try:
# 每个进程处理文件
# TODO
pass
except Exception as e:
print(" [ERROR] Process %s %s at frame %d" % (multiprocessing.current_process().name, e, i))
# 记录处理文件log
# writer_lock.acquire()
# txt_writer.write(file + " " + "XXX" + "\n")
# writer_lock.release()
# 输出该进程处理文件总数
print("Process %s process file %d" % (multiprocessing.current_process().name, count))
results[index] = count
@click.command()
@click.option('--from_path',
default='',
type=click.STRING,
help='The path of files to process.')
@click.option('--to_path',
default='',
type=click.STRING,
help='The path to save the processed files.')
@click.option('--suffix',
default='mp4',
type=click.STRING,
help='The suffix of the files.')
@click.option('--process_num',
default=2,
type=click.INT,
help='The number of processes.')
def main(mode, from_path, to_path, suffix, process_num):
from_path = os.path.normpath(from_path)
to_path = os.path.normpath(to_path)
os.makedirs(to_path, exist_ok=True)
# 导入所有需要处理文件名
files = glob.glob(from_path + '/**', recursive=True)
files = [os.path.normpath(file) for file in files if file.endswith(suffix)]
total_file_count = len(files)
server = multiprocessing.Manager()
queue_lock = server.Lock()
writer_lock = server.Lock()
results = server.Array('i', range(process_num)) # 记录每个进程处理文件数
files = server.list(files) # 文件队列
# txt_writer = open(os.path.join(to_path, "Info.txt"), 'w') # 记录处理文件log
print("Beginning the multi processes. Now in process %s." % multiprocessing.current_process().name)
processes = []
# 生成子进程
start_time = time.time()
for i in range(process_num):
t = multiprocessing.Process(target=run_process, args=(i, from_path, to_path, queue_lock, writer_lock, files, results), name='LoopProcess_%d' % i)
processes.append(t)
# 启动子进程
for i in range(process_num):
processes[i].start()
# 关闭子进程
for i in range(process_num):
processes[i].join()
end_time = time.time()
# txt_writer.close()
total_process_count = np.sum(results)
print('End the multi processes. Now in process %s .' % multiprocessing.current_process().name)
print("Total process count is: ", total_process_count)
print("Total file count is: ", total_file_count)
print("Total cost time is: ", end_time - start_time)
if __name__ == "__main__":
main()
只需要完善TODO
处的具体处理内容
启动程序命令:
python main.py --from_path=XXX --to_path=XXX --suffix=jpg --process_num=8
多线程版本
import click
import glob
import os
import numpy as np
import threading
import time
def run_thread(index, from_path, to_path, txt_writer):
# 记录该线程处理文件数
count = 0
while 1:
# 取文件
queue_lock.acquire()
if files:
file = files.pop()
queue_lock.release()
else:
queue_lock.release()
break
count += 1
print("Thread %s get file %s" % (threading.current_thread().name, file))
try:
# 每个线程处理文件
# TODO
pass
except Exception as e:
print(" [ERROR] Thread %s %s at frame %d" % (threading.current_thread().name, e, i))
# 记录处理文件log
# writer_lock.acquire()
# txt_writer.write(file + " " + "XXX" + "\n")
# writer_lock.release()
# 输出该线程处理文件总数
print("Thread %s process file %d" % (threading.current_thread().name, count))
results[index] = count
@click.command()
@click.option('--from_path',
default='',
type=click.STRING,
help='The path of files to process.')
@click.option('--to_path',
default='',
type=click.STRING,
help='The path to save the processed files.')
@click.option('--suffix',
default='mp4',
type=click.STRING,
help='The suffix of the files.')
@click.option('--thread_num',
default=1,
type=click.INT,
help='The number of threading.')
def main(from_path, to_path, suffix, thread_num):
from_path = os.path.normpath(from_path)
to_path = os.path.normpath(to_path)
os.makedirs(to_path, exist_ok=True)
# 导入所有需要处理文件名
files = glob.glob(from_path + '/**', recursive=True)
files = [os.path.normpath(file) for file in files if file.endswith(suffix)]
total_file_count = len(files)
global files
global queue_lock, writer_lock
global results
print("Beginning the multi threads. Now in thread %s." % threading.current_thread().name)
threads = []
queue_lock = threading.Lock()
writer_lock = threading.Lock()
results = [None] * thread_num # 记录每个线程处理文件数
# txt_writer = open(os.path.join(to_path, "Info.txt"), 'w') # 记录处理文件log
start_time = time.time()
# 生成子线程
for i in range(thread_num):
t = threading.Thread(target=run_thread, args=(i, from_path, to_path, txt_writer), name='LoopThread_%d' % i)
threads.append(t)
# 启动子线程
for i in range(thread_num):
threads[i].start()
# 关闭子线程
for i in range(thread_num):
threads[i].join()
end_time = time.time()
# txt_writer.close()
total_process_count = np.sum(results)
print('End the multi threads. Now in thread %s .' % threading.current_thread().name)
print("Total process count is: ", total_process_count)
print("Total file count is: ", total_file_count)
print("Total cost time is: ", end_time - start_time)
if __name__ == "__main__":
main()
只需要完善TODO
处的具体处理内容
启动程序命令:
python main.py --from_path=XXX --to_path=XXX --suffix=jpg --thread_num=8