Examples of "Human Motion State Information Rating" (KNN, Decision Tree, Bayesian)

There are a total of 5 people's exercise data A, B, C, D, E. Each person's data has 41 columns and several rows, that is, 41 characteristic values. Each characteristic value represents a person's body temperature and acceleration at this time. , Data of the magnetic field of a person's environment, etc. Based on these characteristic values, the person's motion state (running, cycling, or squatting) at this time can be predicted.
The data of these five people are stored in featurePaths, featurePaths= ['A/A.feature','B/B.feature','C/C.feature','D/D.feature','E/E. feature']
The movement status of these five people is stored in labelPaths, labelPaths = ['A/A.label','B/B.label','C/C.label','D/D.label',' E/E.label']
Insert picture description here
Insert picture description here
Insert picture description here
Insert picture description here
Insert picture description here

#监督学习--分类算法---KNN——决策树——朴素贝叶斯

import pandas as pd 
import numpy as np   


from sklearn.preprocessing import Imputer #导入预处理模块Imputer
from sklearn.model_selection import train_test_split #导入自动生成训练集和测试集的模块train_teast_split
from sklearn.metrics import classification_report  #导入预测结果评估模块classification——report

#导入三个分类器模块
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.naive_bayes import GaussianNB 



def load_datasets(feature_paths, label_paths):   
    #读取特征文件列表和标签文件列表中的内容,归并到一个集合后返回
    

    feature = np.ndarray(shape=(0,41))#创建一个0行41列的空数组
    label = np.ndarray(shape=(0,1)) #创建一个0行1列的空数组
    

    for file in feature_paths:      
        
        #调用pandas库中的read_table函数,读取一个特征文件的内容;
        #使用逗号分隔符读取特征数据,将问号替换标记为缺失值,文件不包含表头
        df = pd.read_table(file, delimiter=',', na_values='?', header=None)  
        
        #Imputer函数,通过设定strategy参数为mean,使用平均值补全缺失数据,axis=0为第一列
        imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
        imp.fit(df)       
        df = imp.transform(df)        

        #将预处理后的数据加入feature
        feature = np.concatenate((feature, df))    
        
    
    for file in label_paths:   
        
        #读取标签数据,文件中不包含表头
        df = pd.read_table(file, header=None)
        #将新读入的数据合并到标签集合中         
        label = np.concatenate((label, df))
        
    #将标签归整为一维向量(数组)        
    label = np.ravel(label)     
    #返回更新后的值
    return feature, label 


if __name__ == '__main__':     
    
	featurePaths =['A/A.feature','B/B.feature','C/C.feature','D/D.feature','E/E.feature']
    labelPaths = ['A/A.label','B/B.label','C/C.label','D/D.label','E/E.label']
     
    ''' 调用函数,读入数据  '''     
    #将前4个数据集作为训练集读入
    x_train,y_train = load_datasets(featurePaths[:4],labelPaths[:4])
     #将最后1个数据作为测试集读入
    x_test,y_test = load_datasets(featurePaths[4:],labelPaths[4:])     
    
    #使用全量数据作为训练集,借助train_test_split函数将训练数据打乱
    #test_size=0.0只会对数据集中的子集进行随机排列,size=0不会改变数组的原有size
    x_train, x_, y_train, y_ = train_test_split(x_train, y_train, test_size = 0.0)   
    

    #创建近邻分类器并在测试集上预测       
    print('Start training knn')     
    knn = KNeighborsClassifier().fit(x_train, y_train)     
    print('Training done')     
    answer_knn = knn.predict(x_test)     
    print('Prediction done')        
    
    #创建决策树分类
    print('Start training DT')     
    dt = DecisionTreeClassifier().fit(x_train, y_train)     
    print('Training done')     
    answer_dt = dt.predict(x_test)     
    print('Prediction done')      

    #创建朴素贝叶斯分类    
    print('Start training Bayes')     
    gnb = GaussianNB().fit(x_train, y_train)     
    print('Training done')     
    answer_gnb = gnb.predict(x_test)     
    print('Prediction done')         
    
    
    #分类结果分析
    #使用classification_report函数计算准确率、召回率、f1值和支持度
    print('\n\nThe classification report for knn:')     
    print(classification_report(y_test, answer_knn))     
    print('\n\nThe classification report for DT:')     
    print(classification_report(y_test, answer_dt))     
    print('\n\nThe classification report for Bayes:')     
    print(classification_report(y_test, answer_gnb))

Insert picture description here
Insert picture description here
Insert picture description here
Insert picture description here

Guess you like

Origin blog.csdn.net/weixin_45014721/article/details/114653227