[胡搞]某炼丹比赛几小时现学的python+ml效果,时间:2018/8/14(第一次写python很狼狈,只能说吃好喝好)

#from sklearn.cross_validation import train_test_split
#rtrain, rtest = train_test_split(rentDf, test_size = 0.3)
#strain, stest = train_test_split(soldDf, test_size = 0.3)
strain[strain.bedroom_cnt > 3].bedroom_cnt = 4;
rtrain[rtrain.build_area > 0].build_area = rtrain.build_area * 2/3 + rtrain.insize_area * 1/3
strain[strain.build_area > 0].build_area = strain.build_area * 1/2 + strain.inside_area * 1/2
strain[strain.total_price > 0].total_price = strain.total_price / strain.build_area + rtrain.bedroom * 4/rtrain
rtrain[rtrain.total_price > 0].total_price = rtrain.total_price / rtrain.total_price + rtrain.bedroom_cnt * 1/2



#rtest.to_csv('/home/qushanzu/桌面/jisuanzhidao/a.csv', sep=',', header=True, index=True)
#df = df.groupby(by=['column_A'])['column_B'].sum
i = 0
for data in ref.index:
    if(data['total_price'] < 0)ans[i]=0.1
    i = i+1

from sklearn import tree
clf = tree.DecisionTreeClassifier(criterion='entropy')
print(clf)
clf.fit(x_train, y_train)
 
''''' 把决策树结构写入文件 '''
with open("tree.dot", 'w') as f:
  f = tree.export_graphviz(clf, out_file=f)
    
''''' 系数反映每个特征的影响力。越大表示该特征在分类中起到的作用越大 '''
print(clf.feature_importances_)
 
'''''测试结果的打印'''
answer = clf.predict(x_train)
print(x_train)
print(answer)
print(y_train)
print(np.mean( answer == y_train))
    


#for i in range(1, rtest.)
    
   
#'''
发布了22 篇原创文章 · 获赞 6 · 访问量 5227

猜你喜欢

转载自blog.csdn.net/qq_38362788/article/details/81608401