Import numpy AS NP Import PANDAS AS PD Import matplotlib.pyplot PLT AS Data = pd.read_excel ( ' jiemo.xls ' ) # Import 58.csv current data file folder DF = pd.DataFrame (Data) DF1 = DF. dropna () DF1 [ ' square ' ] = DF1 [ ' square ' ] .str.replace ( " m " , "" ) DF1 [ ' price / m2 ' ] = DF1 [ ' price ' ] / DF1 [ ' sq. '].astype(int)
DF1 = D1 [ ' Address ' ] category = pd.Categorical (D1) from sklearn.preprocessing Import LabelEncoder Le = LabelEncoder () le.fit (category) D1 = le.transform (D1) . price = DF1 [ ' price / per square m ' ] Adress = D1 plt.scatter (. price, Adress) # x, y in the range provided # can filter out a portion of the dirty data # set the title and x, y-axis label plt.xlabel ( " . price " ) plt.ylabel ( " Adress") plt.show()
d1=df1['地址'] category = pd.Categorical(d1) from sklearn.preprocessing import LabelEncoder le = LabelEncoder() le.fit(category) d1=le.transform(d1) print(d1) d2=df1['朝向'] category = pd.Categorical(d2) le.fit(category) d2=le.transform(d2) print(d2) d3=df1['类型'] category = pd.Categorical(d3) le.fit(category) d3=le.transform(d3) print(d3) target=[] t2=[] feature=zip(d1,df1['平方'].astype(int),d2,d3) for i in df1['价格'].astype(int): target.append(i) for i in feature: t2.append(i)
[186 163 187 ... 235 231 231] [31 33 31 ... 33 31 33] [17 17 5 ... 17 2 39]
from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=1) knn.fit(t2,target) knn.score(t2,target)
#0.8762793914246196
knn.predict([[167,80,2,2]])
Data Address:
Link: https: //pan.baidu.com/s/1vxt87sgEHFYjCmuyUPPVHg
extraction code: eo90
copy the contents of this open Baidu network disk phone App, the operation more convenient oh