sklearn实战之构建SVM多分类器

利用sklearn库构建SVM分类器十分简单，因为这个库已经封装好了，只用调用相应的函数即可。
# -*- coding: utf-8 -*-
"""
Created on Fri Nov 23 18:44:37 2018

@author: 13260
"""

import os
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn import svm, metrics, preprocessing
from sklearn.metrics import roc_curve, auc, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.externals import joblib
from scipy import interp

# 加载图像特征及标签
'''
def read_features(filedir):
    file_list = os.listdir(filedir)
    X = []
    tmp_y = os.listdir("F:/shiyan/TensorFlow/retrain/data/train")
    # print(len(y))
    y = []
    for file in file_list:
        
        tmp_file = filedir + "/" + file
        tmp = np.loadtxt(tmp_file,dtype=str)
        # np格式转换
        feature = tmp.astype(np.float)
        X.append(feature)
        old_filename = file[:-3].split("_")
        filename = "_".join(old_filename[:-1])
        # tmp_filename = filter(str.isalpha,file[:-3])
        
        # print(filename)
        y.append(tmp_y.index(filename))
    # 特征数据保存到txt文件的格式是str,因此在进行运算时应进行格式转换    
    
    tmp = "F:/python/airplane_001.txt"
    tmp_data = np.loadtxt(tmp,dtype=str)
    res = tmp_data.astype(np.float)
    X.append(res)
'''
# 加载特征和标签文件
def load_features_and_labels(features_path,labels_path):
   features = np.load(features_path)
   labels = np.load(labels_path)
   print("[INFO] Feature and label file have been loaded !")
   return features,labels

def train_and_test_model(feature,label):
    # X_scaled = preprocessing.scale(X)
    # print(y)
    # y = label_binarize(y,classes=list(range(45)))
    label_list = os.listdir("F:/shiyan/TensorFlow/retrain/data/train")
    # print(label.shape)
    # print(label)
    label = LabelBinarizer().fit_transform(label)
    # print(label)
    # print(label[:45])
    # print(label.shape[1])
    # print(y[:45])
    # 训练模型并进行预测
    random_state = np.random.RandomState(0)
    n_samples, n_feature = feature.shape
    # 随机化数据，并划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(feature, label, test_size=.2,random_state=0)
    # 训练模型
    model = OneVsRestClassifier(svm.SVC(kernel='linear',probability=True,random_state=random_state))
    print("[INFO] Successfully initialize a new model !")
    print("[INFO] Training the model…… ")
    clt = model.fit(X_train,y_train)
    print("[INFO] Model training completed !")
    # 保存训练好的模型，下次使用时直接加载就可以了
    joblib.dump(clt,"F:/python/model/conv_19_80%.pkl")
    print("[INFO] Model has been saved !")
    '''
    # 加载保存的模型
    clt = joblib.load("F:/python/model/SVM.pkl")
    print("model has been loaded !")
    # y_train_pred = clt.predict(X_train)
    '''
    y_test_pred = clt.predict(X_test)
    ov_acc = metrics.accuracy_score(y_test_pred,y_test)
    print("overall accuracy: %f"%(ov_acc))
    print("===========================================")
    acc_for_each_class = metrics.precision_score(y_test,y_test_pred,average=None)
    print("acc_for_each_class:\n",acc_for_each_class)
    print("===========================================")
    avg_acc = np.mean(acc_for_each_class)
    print("average accuracy:%f"%(avg_acc))
    print("===========================================")
    classification_rep = classification_report(y_test,y_test_pred,
                                               target_names=label_list)
    print("classification report: \n",classification_rep)
    print("===========================================")
    #print("===========================================")
    confusion_matrix = metrics.confusion_matrix(y_test.argmax(axis=1),y_test_pred.argmax(axis=1))
    print("confusion metrix:\n",confusion_matrix)
    print("===========================================")
    # print("accuracy: %f"%(acc_r))
    print("[INFO] Successfully get SVM's classification overall accuracy ! ")


if __name__ == "__main__":
    features_path = "F:/python/features/DenseNet/densenet_fv_flatten.npy"
    labels_path = "F:/python/features/VGG19/VGG19_labels.npy"
    feature,label = load_features_and_labels(features_path,labels_path)    
    train_and_test_model(feature,label)
Cassiel.Lee
发布了50 篇原创文章 · 获赞 5 · 访问量 2万+
私信关注
sklearn实战之构建SVM多分类器

猜你喜欢