简单记录一下,免得下次找不到,还得重写。
目录
多线程
先看一下效果,然后直接上代码,并且每行都配注释。
# 导入线程池的包
from concurrent.futures import ThreadPoolExecutor, as_completed
# 整个的任务函数,方便调用。也可以拆开写
def extract_append_audio_features(extract_type='age', max_workers=16):
# feature_csv任务队列,可以理解为数组
datas_num = len(feature_csv)
# 不知道为什么tqdm没显示,所以这里搞个锁
compute_cnt_lock = threading.Lock()
# 与compute_cnt_lock锁配合,用来计当前已执行的总数
compute_cnt = 0
# 这是真正要被线程池调用的函数,id用来表示不同的线程
def func(id, start_index, end_index):
# 说明变量是从外部获得的
nonlocal compute_cnt
# 用来存处理的结果
feature_data = []
# 根据参数确定当前线程中执行数组中的哪部分
for path in feature_csv[start_index:end_index]:
t1 = time.time()
# 执行一些操作
data = do_feature(y)
# 将操作结果存下来
feature_data.append(data)
# 获取线程锁,并将计数+1
with compute_cnt_lock:
compute_cnt += 1
print(f'\r完成进度[{id}=>{round(time.time()-t1, 3)}s]: {compute_cnt}/{datas_num}', end='')
# 全部完成后,返回本次处理的内容
return id, feature_data
# 这个函数用来根据指定的线程数,自动分配线程资源
def allocate_threads(thread_num=5):
# 计算每个线程会包含数组中的多少个元素
gap = (datas_num // thread_num)+1
# 创建线程池对象
pool = ThreadPoolExecutor(max_workers=max_workers)
threads_pool = []
futures = []
results = {}
# 开始根据指定的线程数,为不同线程分配资源
for id in range(thread_num):
# 第id个线程在数组中元素的起始位置和结束位置
start_index = id * gap
end_index = (id+1) * gap
# 如果数组越界,就让他等于数组最大值
start_index = start_index if start_index < datas_num else datas_num
end_index = end_index if end_index < datas_num else datas_num
# print(f'线程{id}分配内容: {start_index} - {end_index}')
# 分配完成,提交任务,并启动线程池
future = pool.submit(func, id, start_index, end_index)
futures.append(future)
print('>> 线程全部启动')
# 等待线程池完成,并获取返回值
for future in as_completed(futures):
id, feature_data = future.result()
results[id] = feature_data
print('\n>> 线程全部完成')
# 根据分配的id顺序,重组结果
new_feature_data = []
for i in range(len(results)):
new_feature_data.extend(results[i])
# 返回最终的排完序的结果
return new_feature_data
# 调用资源分配函数,并获取结果
new_feature_data = allocate_threads(max_workers)
# 根据结果,可以直接return,也可以执行进一步的操作。
# xxxx
# xxxx
# 调用演示
extract_append_audio_features(extract_type='age', max_workers=16)
多进程
与多线程差不多,只是涉及到了多进程通信,因此变量传输会有点不一样。此外也不能用函数的嵌套,所以把函数提了出来。
def extract_append_func(id, feature_csv, start_index, end_index, msg_queue, result_queue):
feature_data = []
try:
for path in feature_csv['filename'][start_index:end_index]:
t1 = time.time()
result = '' # 计算结果
feature_data.append(result )
t2 = time.time()
msg_queue.put({'code': 'msg', 'id': id, 'time': round(t2-t1, 3)})
except Exception as e:
msg_queue.put(str(e))
result_queue.put((id, feature_data))
return id, feature_data
# 添加一个新的特征
def extract_append_audio_features(extract_type='age', max_workers=16):
csv_path = f"train_output/{extract_type}/features.csv"
feature_csv = pd.read_csv(csv_path)
datas_num = len(feature_csv['filename'])
msg_queue = Queue()
msg_queue_listener = True
manager = multiprocessing.Manager()
result_queue = manager.Queue()
current_index = 0
def print_msg_queue():
nonlocal current_index
print('>> 启动queue监听中...')
while msg_queue_listener:
entry = msg_queue.get()
if type(entry)==dict and entry['code'] == 'msg':
current_index += 1
print(f"\r完成进度[{entry['id']}=>{entry['time']}s]: {current_index}/{datas_num}", end='', flush=True)
else:
print(entry)
print('\n>> queue监听中结束')
pt = threading.Thread(target=print_msg_queue, daemon=True)
pt.start()
gap = (datas_num // max_workers)+1
threads_pool = []
futures = []
results = {}
for id in range(max_workers):
start_index = id * gap
end_index = (id+1) * gap
start_index = start_index if start_index < datas_num else datas_num
end_index = end_index if end_index < datas_num else datas_num
# print(f'线程{id}分配内容: {start_index} - {end_index}')
t = Process(target=extract_append_func, args=(id, feature_csv, start_index, end_index, msg_queue, result_queue), daemon=True)
threads_pool.append(t)
print('>> 线程全部启动')
for t in threads_pool:
t.start()
for t in threads_pool:
t.join()
msg_queue_listener = False
print('\n>> 线程全部完成')
while not result_queue.empty():
id, feature_data = result_queue.get()
results[id] = feature_data
print(results)
扫描二维码关注公众号,回复:
14629779 查看本文章