Rent data analysis, knn algorithm


Import
numpy AS NP Import PANDAS AS PD Import matplotlib.pyplot PLT AS Data = pd.read_excel ( ' jiemo.xls ' ) # Import 58.csv current data file folder DF = pd.DataFrame (Data) DF1 = DF. dropna () DF1 [ ' square ' ] = DF1 [ ' square ' ] .str.replace ( " m " , "" ) DF1 [ ' price / m2 ' ] = DF1 [ ' price ' ] / DF1 [ ' sq. '].astype(int)

 

DF1 = D1 [ ' Address ' ] 
category = pd.Categorical (D1)
 from sklearn.preprocessing Import LabelEncoder 
Le = LabelEncoder () 
le.fit (category) 
D1 = le.transform (D1) 

. price = DF1 [ ' price / per square m ' ] 
Adress = D1 
plt.scatter (. price, Adress) 
# x, y in the range provided 
# can filter out a portion of the dirty data 

# set the title and x, y-axis label 
plt.xlabel ( " . price " ) 
plt.ylabel ( " Adress")
plt.show()

 

d1=df1['地址']
category = pd.Categorical(d1)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(category)
d1=le.transform(d1)
print(d1)
d2=df1['朝向']
category = pd.Categorical(d2)
le.fit(category)
d2=le.transform(d2)
print(d2)
d3=df1['类型']
category = pd.Categorical(d3)
le.fit(category)
d3=le.transform(d3)
print(d3)
target=[]
t2=[]
feature=zip(d1,df1['平方'].astype(int),d2,d3)

for i in df1['价格'].astype(int):
    target.append(i)
for i in feature:
    t2.append(i)
[186 163 187 ... 235 231 231]
[31 33 31 ... 33 31 33]
[17 17  5 ... 17  2 39]
 
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(t2,target)
knn.score(t2,target)

#0.8762793914246196

knn.predict([[167,80,2,2]])

Data Address:

Link: https: //pan.baidu.com/s/1vxt87sgEHFYjCmuyUPPVHg
extraction code: eo90
copy the contents of this open Baidu network disk phone App, the operation more convenient oh

Guess you like

Origin www.cnblogs.com/pythonz/p/11111648.html