机器学习-建立事件预测器

import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import classification_report

input_file = 'building_event_binary.txt'
#读取数据
X = []
count = 0
with open(input_file, 'r') as f:
    for line in f.readlines():
        data = line[:-1].split(',')
        X.append([data[0]] + data[2:]) #data[1]没有用

X = np.array(X)

#将字符串转换为数值
label_encoder = []
X_encoded = np.empty(X.shape)
for i, item in enumerate(X[0]):         #处理一行数据,
    if item.isdigit():                  #如果这个数据是数值型的那么这一列就是数值型的
        X_encoded[:,i] = X[:,i]
    else:
        label_encoder.append(preprocessing.LabelEncoder())#插入
        X_encoded[:,i] = label_encoder[-1].fit_transform(X[:,i])     #用最新的训练器去转换字符串

X = X_encoded[:,:-1].astype(int)
y = X_encoded[:,-1].astype(int)

#建立SVM模型
params = {'kernel':'rbf', 'probability':True, 'class_weight':'balanced'}
classifier = SVC(**params)
classifier.fit(X, y)

#交叉验证
accuracy = cross_val_score(classifier, X, y, scoring='accuracy', cv = 3)
print(round(100*accuracy.mean(),2))

#性能评估
y_predict = classifier.predict(X)
print(classifier.score(X, y_predict))
print(classification_report(y, y_predict))

#对单一数据示例进行编码测试
input_data = ['Tuesday', '12:30:00', '21', '23']
input_data_encoded = [-1]*len(input_data)       #[-1,-1,-1,-1]
count = 0
for i, item in enumerate(input_data):
    if item.isdigit():
        input_data_encoded[i] = int(input_data[i])
    else:
        input_data_encoded[i] = int(label_encoder[count].transform(input_data[i]))
        count = count + 1
input_data_encoded = np.array(input_data_encoded)

#输出结果
output_class = classifier.predict(input_data_encoded)
print('Output class', label_encoder[-1].inverse_transform(output_class))[0]

猜你喜欢

转载自blog.csdn.net/u012967763/article/details/79233638