python&pandas | Multi-process processing dataframe to calculate a new column through a certain column

from multiprocessing import cpu_count
from joblib import Parallel, delayed
import pandas as pd

cores = cpu_count()
"""
对当前列进行处理的函数
"""
def proc():
	"""
	xxxxxxxxxxxxxxx
	"""
	return results
	
def apply_parallel(df_grouped, func):
	results = Parallel(n_jobs = cores)(delayed(func)(group) for name, group in df_grouped)
	return pd.concat(results)

def func(df):
	df["new"] = df["old"].apply(lambda x:proc(x))
	return df

df_grouped = df.groupby("old")
df_new = apply_parallel(df_grouped, func)

Guess you like

Origin blog.csdn.net/weixin_43236007/article/details/107537523
Recommended