版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sinat_26566137/article/details/82893680
(1)代码1
def predict_shixin(self):
clf = joblib.load(
'/home/sc/PycharmProjects/sc/model-feature-engine/evaluate_model/shixin_cox_evaluate_model/shixin_cox_model.pkl')
input = self.shixin_cox_model_input_tb
input_x = input['judge_cnt', 'net_fr_judge_cnt']
dict1 = {}
#预测未来12个月,间隔为0.05;
times = numpy.arange(0, 12, 0.05)
ypred = clf.predict_survival_function(input_x, times)
# ypred的列名是对应输入原表的input_x的索引,而原表的input_x的索引对应的是原表中的公司名;
for i in list(input.index):
print(input.loc[i, "company_name"])
dict1[i] = input.loc[i, "company_name"]
# 将ypred的列名对应到原表中的公司名
ypred.rename(columns=dict1, inplace=True)
# 将矩阵进行行列转置
kk = ypred.as_matrix(columns=None).T
# 将矩阵转换成DataFrame,设置索引和列名
rst = pd.DataFrame(kk, index=ypred.columns, columns=list(ypred.index))
#增加一列为公司名,放在开头的第一列的位置
rst.insert(0,'company_name',rst.index)
#将索引换成自然数
rst.index = range(0, len(rst))
#保存到hdfs
self._save(rst,'/hdfs/riskModelAuto/batch_1/v2_1/feature/' , header=True, delimiter=',',
mode="overwrite", quote='"')
(2)代码2
#################测试模型
save_model_path = os.path.join(prj_path, "shixin_cox_model.pkl")
df = pd.read_csv('/home/sc/Downloads/tmp/shixin_cox_all_data_to_model_new.csv')
clf=joblib.load(save_model_path)
c = ['shixin_label', 'survival_time', 'network_fr_judgedoc_cnt','judgedoc_cnt']
df1=df[c]
df1 = df1[0:100]
# print(df1['survival_time'])
df1 = df1.sort_values('survival_time',ascending = True)
# print(df1['survival_time'])
# print(df1)
df1 = df1.fillna(0)
# shixin_0 = df1[(df1['shixin_label'] == 0)][0:100]
# shixin_1 = df1[(df1['shixin_label'] == 1)][0:300]
X=df1[['network_fr_judgedoc_cnt','judgedoc_cnt']]
# times =df1['survival_time']
times = numpy.arange(0, 12, 0.05)
ypred = clf.predict_survival_function(X,times)
# print(ypred)
# print(len(times)) #240个点
# print(len(X))#100家公司
#
# print(type(ypred))
# print(type(df1))
# print(len(ypred))
'''
[112 rows x 100 columns]
<class 'pandas.core.frame.DataFrame'>
112
'''
rst1 =ypred.iloc[:,1:2]
rst2 =ypred.iloc[:,19:20]
# rst1 =ypred.ix[19]
# print(rst1)
# print(len(ypred.columns))
print(ypred.columns)
print(ypred.index)
# print(X.index)
dict1 = {}
for i in list(X.index):
# print(i)
# print(df.columns)
# print(df.loc[i,"company_name"])
dict1[i] = df.loc[i,"company_name"]
ypred.rename(columns = dict1,inplace = True)
# ypred['survival_monthes'] = ypred.index
print(ypred.columns)
print(ypred['一亿贵金属如皋有限公司'])
print(ypred.iloc[0:3,:])
kk = ypred.as_matrix(columns = None).T
index=list(dict1.values())
print(index)
columns = list(ypred.index)
print(columns)
bb = pd.DataFrame(kk,index=ypred.columns,columns = list(ypred.index))
bb.insert(0,'company_name',bb.index)
# bb['company_name'] = bb.index
print(len(bb))
bb.index = range(0,len(bb))
print(bb)
# print(bb.iloc[0:3,:])
# print(bb.columns)
# stk_close_df = ypred.pivot(index='d_day', values='close', columns='stk_code')
# print(ypred.index)
# x1 = list(rst1.index)
# x2 = list(rst2.index)
#
# #plot对象里面可以是list,也可以是series;
# plt.xlim(1.5, 3)
# plt.ylim(0.8, 1)
# plt.plot(x1,rst1,'r',x2,rst2,'g')
# # rst2.plot()
#
#
# plt.show()