DataFrame的先转换成matrix,然后实现转置,再重命名列名跟索引

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sinat_26566137/article/details/82893680

(1)代码1

    def predict_shixin(self):
        clf = joblib.load(
            '/home/sc/PycharmProjects/sc/model-feature-engine/evaluate_model/shixin_cox_evaluate_model/shixin_cox_model.pkl')
        input = self.shixin_cox_model_input_tb
        input_x = input['judge_cnt', 'net_fr_judge_cnt']
        dict1 = {}
        #预测未来12个月,间隔为0.05;
        times = numpy.arange(0, 12, 0.05)
        ypred = clf.predict_survival_function(input_x, times)

        # ypred的列名是对应输入原表的input_x的索引,而原表的input_x的索引对应的是原表中的公司名;
        for i in list(input.index):
            print(input.loc[i, "company_name"])
            dict1[i] = input.loc[i, "company_name"]
        # 将ypred的列名对应到原表中的公司名
        ypred.rename(columns=dict1, inplace=True)
        # 将矩阵进行行列转置
        kk = ypred.as_matrix(columns=None).T
        # 将矩阵转换成DataFrame,设置索引和列名
        rst = pd.DataFrame(kk, index=ypred.columns, columns=list(ypred.index))

        #增加一列为公司名,放在开头的第一列的位置
        rst.insert(0,'company_name',rst.index)

        #将索引换成自然数
        rst.index = range(0, len(rst))

        #保存到hdfs
        self._save(rst,'/hdfs/riskModelAuto/batch_1/v2_1/feature/' , header=True, delimiter=',',
                   mode="overwrite", quote='"')

(2)代码2

#################测试模型
save_model_path = os.path.join(prj_path, "shixin_cox_model.pkl")
df = pd.read_csv('/home/sc/Downloads/tmp/shixin_cox_all_data_to_model_new.csv')
clf=joblib.load(save_model_path)
c = ['shixin_label', 'survival_time', 'network_fr_judgedoc_cnt','judgedoc_cnt']
df1=df[c]
df1 = df1[0:100]
# print(df1['survival_time'])
df1 = df1.sort_values('survival_time',ascending = True)
# print(df1['survival_time'])
# print(df1)

df1 = df1.fillna(0)
# shixin_0 = df1[(df1['shixin_label'] == 0)][0:100]
# shixin_1 = df1[(df1['shixin_label'] == 1)][0:300]
X=df1[['network_fr_judgedoc_cnt','judgedoc_cnt']]
# times =df1['survival_time']
times = numpy.arange(0, 12, 0.05)

ypred = clf.predict_survival_function(X,times)
# print(ypred)
# print(len(times)) #240个点
# print(len(X))#100家公司
#
# print(type(ypred))
# print(type(df1))
# print(len(ypred))
'''
[112 rows x 100 columns]
<class 'pandas.core.frame.DataFrame'>
112
'''
rst1 =ypred.iloc[:,1:2]
rst2 =ypred.iloc[:,19:20]

# rst1 =ypred.ix[19]
# print(rst1)
# print(len(ypred.columns))
print(ypred.columns)
print(ypred.index)

# print(X.index)
dict1 = {}
for i in list(X.index):
    # print(i)
    # print(df.columns)
    # print(df.loc[i,"company_name"])

    dict1[i] = df.loc[i,"company_name"]

ypred.rename(columns = dict1,inplace = True)
# ypred['survival_monthes'] = ypred.index
print(ypred.columns)
print(ypred['一亿贵金属如皋有限公司'])
print(ypred.iloc[0:3,:])
kk = ypred.as_matrix(columns = None).T
index=list(dict1.values())
print(index)
columns = list(ypred.index)
print(columns)

bb = pd.DataFrame(kk,index=ypred.columns,columns = list(ypred.index))
bb.insert(0,'company_name',bb.index)
# bb['company_name'] = bb.index

print(len(bb))
bb.index = range(0,len(bb))
print(bb)
# print(bb.iloc[0:3,:])
# print(bb.columns)

# stk_close_df = ypred.pivot(index='d_day', values='close', columns='stk_code')
# print(ypred.index)
# x1 = list(rst1.index)
# x2 = list(rst2.index)
#
# #plot对象里面可以是list,也可以是series;
# plt.xlim(1.5, 3)
# plt.ylim(0.8, 1)
# plt.plot(x1,rst1,'r',x2,rst2,'g')
# # rst2.plot()
#
#
# plt.show()



猜你喜欢

转载自blog.csdn.net/sinat_26566137/article/details/82893680
今日推荐