import numpy as np
import pandas as pd
from sklearn import svm
from sklearn import cross_validation
import tushare as ts
from sklearn.linear_model import LogisticRegression
data=ts.get_k_data('600000',start='2015-01-01',end='2018-04-12')
data.head(10)
|
date |
open |
close |
high |
low |
volume |
code |
0 |
2015-01-05 |
10.178 |
10.300 |
10.415 |
9.973 |
5135687.0 |
600000 |
1 |
2015-01-06 |
10.255 |
10.338 |
10.691 |
10.140 |
5116845.0 |
600000 |
2 |
2015-01-07 |
10.191 |
10.133 |
10.364 |
9.954 |
3857168.0 |
600000 |
3 |
2015-01-08 |
10.172 |
9.774 |
10.178 |
9.742 |
3306271.0 |
600000 |
4 |
2015-01-09 |
9.742 |
9.890 |
10.415 |
9.685 |
4919999.0 |
600000 |
5 |
2015-01-12 |
9.935 |
9.755 |
10.069 |
9.582 |
3677872.0 |
600000 |
6 |
2015-01-13 |
9.697 |
9.730 |
9.877 |
9.678 |
1784049.0 |
600000 |
7 |
2015-01-14 |
9.813 |
9.928 |
10.120 |
9.813 |
3090188.0 |
600000 |
8 |
2015-01-15 |
9.928 |
10.332 |
10.351 |
9.864 |
3298268.0 |
600000 |
9 |
2015-01-16 |
10.409 |
10.556 |
10.736 |
10.358 |
4802458.0 |
600000 |
data_open=data['open']
data_close=data['close']
y=[]
num_x=len(data)
for i in range(num_x):
if data_open[i]>=data_close[i]:
y.append(1)
else:
y.append(0)
x_data=data.as_matrix()
x=x_data[:,1:6]
data_shape=x.shape
data_rows=data_shape[0]
data_cols=data_shape[1]
data_col_max=x.max(axis=0)
data_col_min=x.min(axis=0)
print(data_col_max,data_col_min)
(array([13.67, 13.76, 14.02, 13.53, 8379504.0], dtype=object), array([8.412, 8.311, 8.774, 7.989, 79967.0], dtype=object))
for i in xrange(0, data_rows, 1):
for j in xrange(0, data_cols, 1):
x[i][j] = \
(x[i][j] - data_col_min[j]) / \
(data_col_max[j] - data_col_min[j])
print(x[0:2])
[[0.33586915176873344 0.3650211047898699 0.31280975981700343
0.35805811225410594 0.6091568722448011]
[0.35051350323316854 0.3719948614424664 0.36542127335112495
0.388197076340011 0.6068866251213773]]
clf1 = svm.SVC(kernel='rbf')
clf2 = LogisticRegression()
result1 = []
result2 = []
for i in range(5):
x_train, x_test, y_train, y_test = \
cross_validation.train_test_split(x, y, test_size=0.2)
clf1.fit(x_train, y_train)
result1.append(np.mean(y_test == clf1.predict(x_test)))
clf2.fit(x_train, y_train)
result2.append(np.mean(y_test == clf2.predict(x_test)))
print("svm classifier accuacy:")
print(result1)
print("LogisticRegression classifier accuacy:")
print(result2)
svm classifier accuacy:
[0.49673202614379086, 0.5490196078431373, 0.49673202614379086, 0.42483660130718953, 0.49673202614379086]
LogisticRegression classifier accuacy:
[0.67320261437908502, 0.62091503267973858, 0.69281045751633985, 0.84313725490196079, 0.72549019607843135]