import numpy as np
import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report
#加载输入文件中的多变量数据
def load_data(input_file):
X = []
y = []
with open(input_file, 'r') as f:
for line in f.readlines():
data = [float(x) for x in line.split(',')]
X.append(data[:-1])
y.append(data[-1])
X = np.array(X)
y = np.array(y)
return X, y
#作图函数
def plot_classifier(classifier, X, y, title='Classifier boundaries', annotate=False):
# define ranges to plot the figure
x_min, x_max = min(X[:, 0]) - 1.0, max(X[:, 0]) + 1.0
y_min, y_max = min(X[:, 1]) - 1.0, max(X[:, 1]) + 1.0
# denotes the step size that will be used in the mesh grid
step_size = 0.01
# define the mesh grid
x_values, y_values = np.meshgrid(np.arange(x_min, x_max, step_size), np.arange(y_min, y_max, step_size))
# compute the classifier output
mesh_output = classifier.predict(np.c_[x_values.ravel(), y_values.ravel()])
# reshape the array
mesh_output = mesh_output.reshape(x_values.shape)
# Plot the output using a colored plot
plt.figure()
# Set the title
plt.title(title)
# choose a color scheme you can find all the options
# here: http://matplotlib.org/examples/color/colormaps_reference.html
plt.pcolormesh(x_values, y_values, mesh_output, cmap=plt.cm.gray)
# Overlay the training points on the plot
plt.scatter(X[:, 0], X[:, 1], c=y, s=80, edgecolors='black', linewidth=1, cmap=plt.cm.Paired)
# specify the boundaries of the figure
plt.xlim(x_values.min(), x_values.max())
plt.ylim(y_values.min(), y_values.max())
# specify the ticks on the X and Y axes
plt.xticks(())
plt.yticks(())
if annotate:
for x, y in zip(X[:, 0], X[:, 1]):
# Full documentation of the function available here:
# http://matplotlib.org/api/text_api.html#matplotlib.text.Annotation
plt.annotate(
'(' + str(round(x, 1)) + ',' + str(round(y, 1)) + ')',
xy = (x, y), xytext = (-15, 15),
textcoords = 'offset points',
horizontalalignment = 'right',
verticalalignment = 'bottom',
bbox = dict(boxstyle = 'round,pad=0.6', fc = 'white', alpha = 0.8),
arrowprops = dict(arrowstyle = '-', connectionstyle = 'arc3,rad=0'))
#加载输入数据
input_file = 'data_multivar.txt'
X, y = load_data(input_file)
#将数据分类
class_0 = np.array([X[i] for i in range(len(X)) if y[i] == 0])
class_1 = np.array([X[i] for i in range(len(X)) if y[i] == 1])
#绘制数据点
#plt.figure()
#plt.scatter(class_0[:,0], class_0[:,1], facecolor = 'black', edgecolors = 'black', marker = 's')
#plt.scatter(class_1[:,0], class_1[:,1], facecolor = 'None', edgecolors = 'black', marker = 's')
#plt.title('Input Data')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)
#param = {'kernel':'linear'} #用线性核函数初始化一个SVM对象
#param = {'kernel':'poly', 'degree':3}#三次多项式方程
param = {'kernel':'rbf'} #径向基函数建立非线性分类器
classifier = SVC(**param)
classifier.fit(X_train, y_train)
plot_classifier(classifier, X_train, y_train, 'Traning Dataset')
plt.show()
target_names = ['Class-'+str(int(i))for i in set(y)]
print(classification_report(y_test, classifier.predict(X_test), target_names=target_names))
机器学习-SVM分类器
猜你喜欢
转载自blog.csdn.net/u012967763/article/details/79217600
今日推荐
周排行