为每个类别变量赋予唯一的数字ID
sklearn.preprocessing.LabelEncoder的使用
from sklearn import preprocessing
from sklearn.ensemble import GradientBoostingRegressor
# from sklearn import cross_validation, metrics
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
import time
encoder=["model"]
print("encoding...")
col_encoder = preprocessing.LabelEncoder()
col_encoder.fit(train_sales_data["model"])
train_sales_data["model"] = col_encoder.transform(train_sales_data["model"])
原始如下:
转换后效果如下: