肌肉活动电信号推测手势
比赛地址:https://god.yanxishe.com/14
数据集采集:手臂皮肤放置8个传感器,每个传感器会得到8个肌肉电信号数据,一个手势共64个数据。训练集第65个数据为手势类别。
数据集共包含4种手势,选手需训练模型正确识别不同肌肉信号数据对应的手势。
0=摇滚,1=剪刀,2=布,3=OK
实验结果
SVM:0.90
MLP:0.88
(一)SVM
sklearn提供了SVM的包
SVM基本函数
from sklearn import svm #导入包
clf=svm.SVC()#创建SVC分类器
clf.fit(x,y)#装载数据训练
#在拟合后,这个模型可以用来预测新的分类值
clf.predict(z)
网格搜索法
GridSearchCV,它存在的意义就是自动调参,只要把参数输进去,就能给出最优化的结果和参数。
常用参数如下:
- estimator选择使用的分类器,并且传入除需要确定最佳的参数之外的其他参数。
- param_grid调优参数,可以是C、gamma、kernel,通常给定一个字典,算法去迭代选择里面的最优值。 param_grid = [{‘kernel’: [‘rbf’], ‘C’: c_range, ‘gamma’: gamma_range}]
- n_jobs=1并行数,int:个数,-1:跟CPU核数一致, 1:默认值
- refit=True默认为True,程序将会以交叉验证训练集得到的最佳参数,重新对所有可用的训练集与开发集进行,作为最终用于性能评估的最佳模型参数。即在搜索参数结束后,用最佳参数结果再次fit一遍全部数据集。
优化后的SVM可以获取其属性: - best_score_ : float best_estimator的分数 best_params_ : dict
- 在保存数据上给出最佳结果的参数设置 best_index_ : int
- 对应于最佳候选参数设置的索引(cv_results_数组)。
代码
'''
利用肌肉信号进行手势识别
SVM实验
数据未标准化:0.23
数据标准化:0.25
最好的参数
{'C': 8.0, 'gamma': 0.015625, 'kernel': 'rbf'}
0.8932681242807825
'''
import pandas as pd
from sklearn import svm
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
import csv
import random
##合并数据
data1 = pd.read_csv("./train/0.csv")
data2 = pd.read_csv("./train/1.csv")
data3 = pd.read_csv("./train/2.csv")
data4 = pd.read_csv("./train/3.csv")
data = pd.concat([data1, data2, data3,data4], axis=0, ignore_index=True)
#删除第一列
data = data.drop('Unnamed: 0',axis=1)
#打乱数据
data = shuffle(data)
nums = data.shape[0]
print(nums)
#划分训练集、测试集
train_data = data.loc[:int(nums*0.8),:'63']
train_label = data.loc[:int(nums*0.8),'64']
test_data = data.loc[int(nums*0.8):,:'63']
test_label = data.loc[int(nums*0.8):,'64']
#数据标准化
scaler = StandardScaler()
train_data = scaler.fit_transform(train_data) # 标准化
test_data = scaler.fit_transform(test_data) # 标准化
if __name__=="__main__":
##SVM方式
print("--SVM--")
'''
svc = svm.SVC(kernel='rbf', class_weight='balanced', )
c_range = np.logspace(-5, 15, 11, base=2)
gamma_range = np.logspace(-9, 3, 13, base=2)
# 网格搜索交叉验证的参数范围,cv=3,3折交叉
param_grid = [{'kernel': ['rbf'], 'C': c_range, 'gamma': gamma_range}]
grid = GridSearchCV(svc, param_grid, cv=3, n_jobs=-1)
# rbf_svc = svm.SVC(kernel='rbf',gamma=0.8,C=1.0).fit(train_data,train_label)
rbf_svc = grid.fit(train_data, train_label)
print(rbf_svc.best_params_)
# 预测数据,
'''
rbf_svc = svm.SVC(kernel='rbf', gamma=0.015625, C=8).fit(train_data, train_label)
pre_yc = rbf_svc.predict(test_data)
correctnum = 0
test_label.index = range(len(pre_yc
))
for i in range(len(pre_yc)):
if (pre_yc[i] == test_label.iloc[i]):
correctnum = correctnum + 1
accuracy = correctnum * 1.0 / len(pre_yc)
print(accuracy)
# 对测试文件进行测试给出结果
datatest = pd.read_csv("./test/test.csv",header=-1)
scaler = StandardScaler()
datatest = scaler.fit_transform(datatest) # 标准化
result = rbf_svc.predict(datatest)
print(result.shape)
# 写入SCV文件
file1 = open("test.csv", "w",newline='')
writer = csv.writer(file1)
print(result[1000])
print(result[0])
for i in range(len(result)):
# 先写入columns_name
writer.writerow([str(i+1), result[i]])
(二)MLP
Step 1 调包
导入MLPClassifier包、OneHotEncoder包
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder
Step 2 数据转OneHot
我们将读取的label转置得到列向量:
原始[1,2,3,4]变为[1;2;3;4]
再对其进行fit_transform转换编码得到结果。
enc = OneHotEncoder()
train_label=enc.fit_transform(np.array([train_label]).T)
train_label=train_label.toarray()
test_label=enc.fit_transform(np.array([test_label]).T)
test_label=test_label.toarray()
Step 3 创建MLP分类器
#MLP分类器
clf = MLPClassifier(solver='adam',alpha=1e-5, hidden_layer_sizes=(32,64,32,16), random_state=1,max_iter = 1000,verbose = True,learning_rate_init= 0.001,learning_rate = 'adaptive',n_iter_no_change=300)
参数简介:
- hidden_layer_sizes=(10,10,5) 代表隐含层的层数以及各层神经元个数;
- activation=‘relu’ 激活函数可选{‘identity’, ‘logistic’, ‘tanh’, ‘relu’}
‘identity’, f(x) = x
‘logistic’,f(x) = 1 / (1 + exp(-x)).
‘tanh’,f(x) = tanh(x).
‘relu’, f(x) = max(0, x) - solver=‘adam’ 优化方式 {‘lbfgs’, ‘sgd’, ‘adam’}, default ‘adam’
- alpha=0.0001 正则化系数
- batch_size default ‘auto’
- learning_rate='constant’学习率模式{‘constant’, ‘invscaling’, ‘adaptive’}, default‘constant’
- learning_rate_init : double, optional, default 0.001The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.
- max_iter=200 最大步长值
- tol : 损失限制, default 1e-4
- n_iter_no_change=100 代表如果100次,loss没有相差小于tol时直接结束。
- verbose是否打印结果efault False
Step 4 训练
clf.fit(train_data, train_label)
代码
'''
利用肌肉信号进行手势识别
MLP实验
'''
import pandas as pd
from sklearn import svm
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV, train_test_split
import csv
import random
##合并数据
data1 = pd.read_csv("./train/0.csv")
data2 = pd.read_csv("./train/1.csv")
data3 = pd.read_csv("./train/2.csv")
data4 = pd.read_csv("./train/3.csv")
data = pd.concat([data1, data2, data3,data4], axis=0, ignore_index=True)
#删除第一列
data = data.drop('Unnamed: 0',axis=1)
#打乱数据
data = shuffle(data)
nums = data.shape[0]
nums= int(nums*0.9)
#ID重置
data.index = range(data.shape[0])
#划分训练集、测试集
train_data = data.loc[:nums,:'63']
train_label = data.loc[:nums,'64']
test_data = data.loc[nums:,:'63']
test_label = data.loc[nums:,'64']
print(nums)
print(data.shape)
print(train_data.shape)
print(test_data.shape)
#数据标准化
scaler = StandardScaler()
train_data = scaler.fit_transform(train_data) # 标准化
test_data = scaler.fit_transform(test_data) # 标准化
enc = OneHotEncoder()
train_label=enc.fit_transform(np.array([train_label]).T)
train_label=train_label.toarray()
test_label=enc.fit_transform(np.array([test_label]).T)
test_label=test_label.toarray()
if __name__=="__main__":
##SVM方式
print("--MLP--")
#MLP分类器
clf = MLPClassifier(solver='adam',alpha=1e-5, hidden_layer_sizes=(32,64,32,16), random_state=1,max_iter = 5000,verbose = True,tol = 1e-6,learning_rate_init= 0.001,learning_rate = 'adaptive',n_iter_no_change=500)
clf.fit(train_data, train_label)
#测试test数据
pre_yc = clf.predict(test_data)
print(pre_yc.shape)
correctnum = 0
#判断测试机准确率
#test_label.index = range(len(pre_yc))
for i in range(pre_yc.shape[0]):
if (pre_yc[i][0] == test_label[i][0] and pre_yc[i][1] == test_label[i][1] and pre_yc[i][2] == test_label[i][2] and pre_yc[i][3] == test_label[i][3]):
correctnum = correctnum + 1
accuracy = correctnum * 1.0 / pre_yc.shape[0]
print(accuracy)
#
# 对测试文件进行测试给出结果
datatest = pd.read_csv("./test/test.csv",header=-1)
scaler = StandardScaler()
datatest = scaler.fit_transform(datatest) # 标准化
result = clf.predict(datatest)
print(result.shape)
# 写入SCV文件
file1 = open("test_mlp.csv", "w",newline='')
writer = csv.writer(file1)
print(result[1000])
print(result[0])
for i in range(len(result)):
# 先写入columns_name
writer.writerow([str(i+1), np.argmax(result[i],axis=0)])