from multiprocessing import cpu_count
from joblib import Parallel, delayed
import pandas as pd
cores = cpu_count()
"""
对当前列进行处理的函数
"""
def proc():
"""
xxxxxxxxxxxxxxx
"""
return results
def apply_parallel(df_grouped, func):
results = Parallel(n_jobs = cores)(delayed(func)(group) for name, group in df_grouped)
return pd.concat(results)
def func(df):
df["new"] = df["old"].apply(lambda x:proc(x))
return df
df_grouped = df.groupby("old")
df_new = apply_parallel(df_grouped, func)
python&pandas | Multi-process processing dataframe to calculate a new column through a certain column
Guess you like
Origin blog.csdn.net/weixin_43236007/article/details/107537523
Recommended
Ranking