import matplotlib.pylab as plt
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
if __name__ == '__main__':
N = 500
df = pd.read_csv('UCI HAR Dataset/train/Inertial Signals/body_acc_x_train.txt', delim_whitespace=True, header=None)
y = pd.read_csv('UCI HAR Dataset/train/y_train.txt', delim_whitespace=True, header=None, squeeze=True)[:N]
# plt.title('accelerometer reading')
# plt.plot(df.ix[0, :])
# plt.show()
#
extraction_settings = ComprehensiveFCParameters()
master_df = pd.DataFrame({'feature': df[:N].values.flatten(),
'id': np.arange(N).repeat(df.shape[1])})
# 时间序列特征工程
X = extract_features(timeseries_container=master_df, n_jobs=0, column_id='id', impute_function=impute,
default_fc_parameters=extraction_settings)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
cl = DecisionTreeClassifier()
cl.fit(X_train, y_train)
print(classification_report(y_test, cl.predict(X_test)))
# 未进行时间序列特征工程
X_1 = df.ix[:N - 1, :]
X_train, X_test, y_train, y_test = train_test_split(X_1, y, test_size=.2)
cl = DecisionTreeClassifier()
cl.fit(X_train, y_train)
print(classification_report(y_test, cl.predict(X_test)))
relevant_features = set()
for label in y.unique():
y_train_binary = y_train == label
X_train_filtered = select_features(X_train, y_train_binary)
print("Number of relevant features for class {}: {}/{}".format(label, X_train_filtered.shape[1],
X_train.shape[1]))
relevant_features = relevant_features.union(set(X_train_filtered.columns))
X_train_filtered = X_train[list(relevant_features)]
X_test_filtered = X_test[list(relevant_features)]
cl = DecisionTreeClassifier()
cl.fit(X_train_filtered, y_train)
print(classification_report(y_test, cl.predict(X_test_filtered)))
自动提取时序特征 tsfresh(2)
猜你喜欢
转载自blog.csdn.net/fang156239305/article/details/103931043
今日推荐
周排行