Simple prediction of stock ups and downs using SVM and logistic regression

import numpy as np
import pandas as pd
from sklearn import svm
from sklearn import cross_validation
import tushare as ts
from sklearn.linear_model import LogisticRegression
data=ts.get_k_data('600000',start='2015-01-01',end='2018-04-12')#通过tushare的接口获取浦发银行的历史数据
data.head(10)
date open close high low volume code
0 2015-01-05 10.178 10.300 10.415 9.973 5135687.0 600000
1 2015-01-06 10.255 10.338 10.691 10.140 5116845.0 600000
2 2015-01-07 10.191 10.133 10.364 9.954 3857168.0 600000
3 2015-01-08 10.172 9.774 10.178 9.742 3306271.0 600000
4 2015-01-09 9.742 9.890 10.415 9.685 4919999.0 600000
5 2015-01-12 9.935 9.755 10.069 9.582 3677872.0 600000
6 2015-01-13 9.697 9.730 9.877 9.678 1784049.0 600000
7 2015-01-14 9.813 9.928 10.120 9.813 3090188.0 600000
8 2015-01-15 9.928 10.332 10.351 9.864 3298268.0 600000
9 2015-01-16 10.409 10.556 10.736 10.358 4802458.0 600000
data_open=data['open']
data_close=data['close']
y=[]
num_x=len(data)
for i in range(num_x):   #准备数据y
    if data_open[i]>=data_close[i]:
        y.append(1)
    else:
        y.append(0)
x_data=data.as_matrix()
x=x_data[:,1:6]     #到这里x和y都已经准备好了
data_shape=x.shape
data_rows=data_shape[0]
data_cols=data_shape[1]
data_col_max=x.max(axis=0)
data_col_min=x.min(axis=0)
print(data_col_max,data_col_min)
(array([13.67, 13.76, 14.02, 13.53, 8379504.0], dtype=object), array([8.412, 8.311, 8.774, 7.989, 79967.0], dtype=object))

for i in xrange(0, data_rows, 1):#将输入数组归一化
    for j in xrange(0, data_cols, 1):
        x[i][j] = \
            (x[i][j] - data_col_min[j]) / \
            (data_col_max[j] - data_col_min[j])
print(x[0:2])
[[0.33586915176873344 0.3650211047898699 0.31280975981700343
  0.35805811225410594 0.6091568722448011]
 [0.35051350323316854 0.3719948614424664 0.36542127335112495
  0.388197076340011 0.6068866251213773]]
clf1 = svm.SVC(kernel='rbf')
clf2 = LogisticRegression()
result1 = []
result2 = []
for i in range(5):
    # x和y的验证集和测试集,切分80-20%的测试集
    x_train, x_test, y_train, y_test = \
        cross_validation.train_test_split(x, y, test_size=0.2)
    # 训练数据进行训练
    clf1.fit(x_train, y_train)
    # 将预测数据和测试集的验证数据比对
    result1.append(np.mean(y_test == clf1.predict(x_test)))
    clf2.fit(x_train, y_train)
    result2.append(np.mean(y_test == clf2.predict(x_test)))
print("svm classifier accuacy:")
print(result1)
print("LogisticRegression classifier accuacy:")
print(result2)
svm classifier accuacy:
[0.49673202614379086, 0.5490196078431373, 0.49673202614379086, 0.42483660130718953, 0.49673202614379086]
LogisticRegression classifier accuacy:
[0.67320261437908502, 0.62091503267973858, 0.69281045751633985, 0.84313725490196079, 0.72549019607843135]

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325762660&siteId=291194637