import pandas as pd import random as rd import numpy as np import math as ma def typeicalSampling(group, typeicalFracDict): name = group.name frac = typeicalFracDict[name] return group.sample(frac=frac) def group_sample(data_set,lable,typeicalFracDict): #分层抽样 #data_set数据集 #lable分层变量名 #typeicalFracDict:分类抽样比例 gbr=data_set.groupby(by=[lable]) result=data_set.groupby(lable,group_keys=False).apply(typeicalSampling,typeicalFracDict) return result data = pd.DataFrame({'id': [3566841, 6541227, 3512441, 3512441, 3512441,3512441, 3512441, 3512441, 3512441, 3512441], 'sex': ['male', 'Female', 'Female','male', 'Female', 'Female','male', 'Female','male', 'Female'], 'level': ['high', 'low', 'middle','high', 'low', 'middle','high', 'low', 'middle','middle']}) data_set=data label='sex' typicalFracDict = { 'male': 0.8, 'Female': 0.5 } result=group_sample(data_set,label,typicalFracDict) print(result)
id sex level
7 3512441 Female low
1 6541227 Female low
2 3512441 Female middle
8 3512441 male middle
6 3512441 male high
0 3566841 male high
资源来自网上自己稍微整理了下