完整的相似度代码

# -*- coding:utf-8 -*-

import pandas as pd
import pymysql
from sqlalchemy import create_engine
from math import sqrt
## 加上字符集参数,防止中文乱码
dbconn = pymysql.connect(
        host="127.0.0.1",
        database="fight",
        user="root",
        password="111111",
        port=3306,
        charset='utf8')
conn = create_engine('mysql+mysqldb://root:111111@localhost:3306/fight?charset=utf8')
#上面这一大段等同于conn = create_engine('mysql+mysqldb://root:111111@localhost:3306/test?charset=utf8')

# sql语句
sqlcmd = "select * from shape_mode_k7f3"
data = pd.read_sql(sqlcmd, dbconn)
#x = data.loc[len(data)-1]#取最后一行的值,用来计算

#按条件筛选出某一行计算距离
date = '2018-03-29'
code = 'SZ000975'
x = data.loc[(data['secode']==code) & (data['mkdate']==date),]#这里取成了一个dataframe

#定义一个计算开方的函数,不然会说是series无法开方
def calc_sqrt(num):
        return sqrt(num)

#用到计算距离里面的欧式距离,以及避免替换数字的麻烦
t=2
r=1
w=0.5
ww=0.3
www=0.2

#提取每天的值
x1_op = [float(i) for i in x['_r_Fop_1'] ]#这里去把dataframe变成了列表
x1_op = x1_op[0]#这里从列表中具体进行取值
x1_cp = [float(i) for i in x['_r_Fcp_1'] ]#这里去把dataframe变成了列表
x1_cp = x1_cp[0]#这里从列表中具体进行取值

x1_hp = [float(i) for i in x['_r_Fhp_1'] ]#这里去把dataframe变成了列表
x1_hp = x1_hp[0]#这里从列表中具体进行取值
x1_maxp = [float(i) for i in x['_r_maxco_1'] ]#这里去把dataframe变成了列表
x1_maxp = x1_maxp[0]#这里从列表中具体进行取值
x1_lp = [float(i) for i in x['_r_Flp_1'] ]#这里去把dataframe变成了列表
x1_lp = x1_lp[0]#这里从列表中具体进行取值
x1_minp = [float(i) for i in x['_r_minco_1'] ]#这里去把dataframe变成了列表
x1_minp = x1_minp[0]#这里从列表中具体进行取值


#第一条K线
#实体相似度
data['juli_oc1']= (pow(data['_r_Fop_1']-x1_op,t)+pow(data['_r_Fcp_1']-x1_cp,t))
data['juli_oc1']=data.apply(lambda row:calc_sqrt(row['juli_oc1']),axis=r)
#上影线相似度
data['juli_hmax1']= (pow(data['_r_Fhp_1']-x1_hp,t)+pow(data['_r_maxco_1']-x1_maxp,t))
data['juli_hmax1']=data.apply(lambda row:calc_sqrt(row['juli_hmax1']),axis=r)
#下影线相似度
data['juli_lmin1']= (pow(data['_r_Flp_1']-x1_lp,t)+pow(data['_r_minco_1']-x1_minp,t))
data['juli_lmin1']=data.apply(lambda row:calc_sqrt(row['juli_lmin1']),axis=r)
#整体相似度距离
data['juli_1']=data['juli_oc1']*w+data['juli_hmax1']*ww+data['juli_lmin1']*www


#提取每天的值
x2_op = [float(i) for i in x['_r_Fop_2'] ]#这里去把dataframe变成了列表
x2_op = x2_op[0]#这里从列表中具体进行取值
x2_cp = [float(i) for i in x['_r_Fcp_2'] ]#这里去把dataframe变成了列表
x2_cp = x2_cp[0]#这里从列表中具体进行取值

x2_hp = [float(i) for i in x['_r_Fhp_2'] ]#这里去把dataframe变成了列表
x2_hp = x2_hp[0]#这里从列表中具体进行取值
x2_maxp = [float(i) for i in x['_r_maxco_2'] ]#这里去把dataframe变成了列表
x2_maxp = x2_maxp[0]#这里从列表中具体进行取值
x2_lp = [float(i) for i in x['_r_Flp_2'] ]#这里去把dataframe变成了列表
x2_lp = x2_lp[0]#这里从列表中具体进行取值
x2_minp = [float(i) for i in x['_r_minco_2'] ]#这里去把dataframe变成了列表
x2_minp = x2_minp[0]#这里从列表中具体进行取值


#第二条K线
#实体相似度
data['juli_oc2']= (pow(data['_r_Fop_2']-x2_op,t)+pow(data['_r_Fcp_2']-x2_cp,t))
data['juli_oc2']=data.apply(lambda row:calc_sqrt(row['juli_oc2']),axis=r)
#上影线相似度
data['juli_hmax2']= (pow(data['_r_Fhp_2']-x2_hp,t)+pow(data['_r_maxco_2']-x2_maxp,t))
data['juli_hmax2']=data.apply(lambda row:calc_sqrt(row['juli_hmax2']),axis=r)
#下影线相似度
data['juli_lmin2']= (pow(data['_r_Flp_2']-x2_lp,t)+pow(data['_r_minco_2']-x2_minp,t))
data['juli_lmin2']=data.apply(lambda row:calc_sqrt(row['juli_lmin2']),axis=r)
#整体相似度距离
data['juli_2']=data['juli_oc2']*w+data['juli_hmax2']*ww+data['juli_lmin2']*www


#提取每天的值
x3_op = [float(i) for i in x['_r_Fop_3'] ]#这里去把dataframe变成了列表
x3_op = x3_op[0]#这里从列表中具体进行取值
x3_cp = [float(i) for i in x['_r_Fcp_3'] ]#这里去把dataframe变成了列表
x3_cp = x3_cp[0]#这里从列表中具体进行取值

x3_hp = [float(i) for i in x['_r_Fhp_3'] ]#这里去把dataframe变成了列表
x3_hp = x3_hp[0]#这里从列表中具体进行取值
x3_maxp = [float(i) for i in x['_r_maxco_3'] ]#这里去把dataframe变成了列表
x3_maxp = x3_maxp[0]#这里从列表中具体进行取值
x3_lp = [float(i) for i in x['_r_Flp_3'] ]#这里去把dataframe变成了列表
x3_lp = x3_lp[0]#这里从列表中具体进行取值
x3_minp = [float(i) for i in x['_r_minco_3'] ]#这里去把dataframe变成了列表
x3_minp = x3_minp[0]#这里从列表中具体进行取值


#第三条K线
#实体相似度
data['juli_oc3']= (pow(data['_r_Fop_3']-x3_op,t)+pow(data['_r_Fcp_3']-x3_cp,t))
data['juli_oc3']=data.apply(lambda row:calc_sqrt(row['juli_oc3']),axis=r)
#上影线相似度
data['juli_hmax3']= (pow(data['_r_Fhp_3']-x3_hp,t)+pow(data['_r_maxco_3']-x3_maxp,t))
data['juli_hmax3']=data.apply(lambda row:calc_sqrt(row['juli_hmax3']),axis=r)
#下影线相似度
data['juli_lmin3']= (pow(data['_r_Flp_3']-x3_lp,t)+pow(data['_r_minco_3']-x3_minp,t))
data['juli_lmin3']=data.apply(lambda row:calc_sqrt(row['juli_lmin3']),axis=r)
#整体相似度距离
data['juli_3']=data['juli_oc3']*w+data['juli_hmax3']*ww+data['juli_lmin3']*www

#提取每天的值
x4_op = [float(i) for i in x['_r_Fop_4'] ]#这里去把dataframe变成了列表
x4_op = x4_op[0]#这里从列表中具体进行取值
x4_cp = [float(i) for i in x['_r_Fcp_4'] ]#这里去把dataframe变成了列表
x4_cp = x4_cp[0]#这里从列表中具体进行取值

x4_hp = [float(i) for i in x['_r_Fhp_4'] ]#这里去把dataframe变成了列表
x4_hp = x4_hp[0]#这里从列表中具体进行取值
x4_maxp = [float(i) for i in x['_r_maxco_4'] ]#这里去把dataframe变成了列表
x4_maxp = x4_maxp[0]#这里从列表中具体进行取值
x4_lp = [float(i) for i in x['_r_Flp_4'] ]#这里去把dataframe变成了列表
x4_lp = x4_lp[0]#这里从列表中具体进行取值
x4_minp = [float(i) for i in x['_r_minco_4'] ]#这里去把dataframe变成了列表
x4_minp = x4_minp[0]#这里从列表中具体进行取值


#第四条K线
#实体相似度
data['juli_oc4']= (pow(data['_r_Fop_4']-x4_op,t)+pow(data['_r_Fcp_4']-x4_cp,t))
data['juli_oc4']=data.apply(lambda row:calc_sqrt(row['juli_oc4']),axis=r)
#上影线相似度
data['juli_hmax4']= (pow(data['_r_Fhp_4']-x4_hp,t)+pow(data['_r_maxco_4']-x4_maxp,t))
data['juli_hmax4']=data.apply(lambda row:calc_sqrt(row['juli_hmax4']),axis=r)
#下影线相似度
data['juli_lmin4']= (pow(data['_r_Flp_4']-x4_lp,t)+pow(data['_r_minco_4']-x4_minp,t))
data['juli_lmin4']=data.apply(lambda row:calc_sqrt(row['juli_lmin4']),axis=r)
#整体相似度距离
data['juli_4']=data['juli_oc4']*w+data['juli_hmax4']*ww+data['juli_lmin4']*www

#提取每天的值
x5_op = [float(i) for i in x['_r_Fop_5'] ]#这里去把dataframe变成了列表
x5_op = x5_op[0]#这里从列表中具体进行取值
x5_cp = [float(i) for i in x['_r_Fcp_5'] ]#这里去把dataframe变成了列表
x5_cp = x5_cp[0]#这里从列表中具体进行取值

x5_hp = [float(i) for i in x['_r_Fhp_5'] ]#这里去把dataframe变成了列表
x5_hp = x5_hp[0]#这里从列表中具体进行取值
x5_maxp = [float(i) for i in x['_r_maxco_5'] ]#这里去把dataframe变成了列表
x5_maxp = x5_maxp[0]#这里从列表中具体进行取值
x5_lp = [float(i) for i in x['_r_Flp_5'] ]#这里去把dataframe变成了列表
x5_lp = x5_lp[0]#这里从列表中具体进行取值
x5_minp = [float(i) for i in x['_r_minco_5'] ]#这里去把dataframe变成了列表
x5_minp = x5_minp[0]#这里从列表中具体进行取值


#第五条K线
#实体相似度
data['juli_oc5']= (pow(data['_r_Fop_5']-x5_op,t)+pow(data['_r_Fcp_5']-x5_cp,t))
data['juli_oc5']=data.apply(lambda row:calc_sqrt(row['juli_oc5']),axis=r)
#上影线相似度
data['juli_hmax5']= (pow(data['_r_Fhp_5']-x5_hp,t)+pow(data['_r_maxco_5']-x5_maxp,t))
data['juli_hmax5']=data.apply(lambda row:calc_sqrt(row['juli_hmax5']),axis=r)
#下影线相似度
data['juli_lmin5']= (pow(data['_r_Flp_5']-x5_lp,t)+pow(data['_r_minco_5']-x5_minp,t))
data['juli_lmin5']=data.apply(lambda row:calc_sqrt(row['juli_lmin5']),axis=r)
#整体相似度距离
data['juli_5']=data['juli_oc5']*w+data['juli_hmax5']*ww+data['juli_lmin5']*www


#提取每天的值
x6_op = [float(i) for i in x['_r_Fop_6'] ]#这里去把dataframe变成了列表
x6_op = x6_op[0]#这里从列表中具体进行取值
x6_cp = [float(i) for i in x['_r_Fcp_6'] ]#这里去把dataframe变成了列表
x6_cp = x6_cp[0]#这里从列表中具体进行取值

x6_hp = [float(i) for i in x['_r_Fhp_6'] ]#这里去把dataframe变成了列表
x6_hp = x6_hp[0]#这里从列表中具体进行取值
x6_maxp = [float(i) for i in x['_r_maxco_6'] ]#这里去把dataframe变成了列表
x6_maxp = x6_maxp[0]#这里从列表中具体进行取值
x6_lp = [float(i) for i in x['_r_Flp_6'] ]#这里去把dataframe变成了列表
x6_lp = x6_lp[0]#这里从列表中具体进行取值
x6_minp = [float(i) for i in x['_r_minco_6'] ]#这里去把dataframe变成了列表
x6_minp = x6_minp[0]#这里从列表中具体进行取值


#第六条K线
#实体相似度
data['juli_oc6']= (pow(data['_r_Fop_6']-x6_op,t)+pow(data['_r_Fcp_6']-x6_cp,t))
data['juli_oc6']=data.apply(lambda row:calc_sqrt(row['juli_oc6']),axis=r)
#上影线相似度
data['juli_hmax6']= (pow(data['_r_Fhp_6']-x6_hp,t)+pow(data['_r_maxco_6']-x6_maxp,t))
data['juli_hmax6']=data.apply(lambda row:calc_sqrt(row['juli_hmax6']),axis=r)
#下影线相似度
data['juli_lmin6']= (pow(data['_r_Flp_6']-x6_lp,t)+pow(data['_r_minco_6']-x6_minp,t))
data['juli_lmin6']=data.apply(lambda row:calc_sqrt(row['juli_lmin6']),axis=r)
#整体相似度距离
data['juli_6']=data['juli_oc6']*w+data['juli_hmax6']*ww+data['juli_lmin6']*www

#提取每天的值
x7_op = [float(i) for i in x['_r_Fop_7'] ]#这里去把dataframe变成了列表
x7_op = x7_op[0]#这里从列表中具体进行取值
x7_cp = [float(i) for i in x['_r_Fcp_7'] ]#这里去把dataframe变成了列表
x7_cp = x7_cp[0]#这里从列表中具体进行取值

x7_hp = [float(i) for i in x['_r_Fhp_7'] ]#这里去把dataframe变成了列表
x7_hp = x7_hp[0]#这里从列表中具体进行取值
x7_maxp = [float(i) for i in x['_r_maxco_7'] ]#这里去把dataframe变成了列表
x7_maxp = x7_maxp[0]#这里从列表中具体进行取值
x7_lp = [float(i) for i in x['_r_Flp_7'] ]#这里去把dataframe变成了列表
x7_lp = x7_lp[0]#这里从列表中具体进行取值
x7_minp = [float(i) for i in x['_r_minco_7'] ]#这里去把dataframe变成了列表
x7_minp = x7_minp[0]#这里从列表中具体进行取值


#第七条K线
#实体相似度
data['juli_oc7']= (pow(data['_r_Fop_7']-x7_op,t)+pow(data['_r_Fcp_7']-x7_cp,t))
data['juli_oc7']=data.apply(lambda row:calc_sqrt(row['juli_oc7']),axis=r)
#上影线相似度
data['juli_hmax7']= (pow(data['_r_Fhp_7']-x7_hp,t)+pow(data['_r_maxco_7']-x7_maxp,t))
data['juli_hmax7']=data.apply(lambda row:calc_sqrt(row['juli_hmax7']),axis=r)
#下影线相似度
data['juli_lmin7']= (pow(data['_r_Flp_7']-x7_lp,t)+pow(data['_r_minco_7']-x7_minp,t))
data['juli_lmin7']=data.apply(lambda row:calc_sqrt(row['juli_lmin7']),axis=r)
#整体相似度距离
data['juli_7']=data['juli_oc7']*w+data['juli_hmax7']*ww+data['juli_lmin7']*www

#总体K线距离相似度
data['simkcoef'] = (data['juli_7']*27 + data['juli_6']*26+ data['juli_5']*25+ data['juli_4']*24+ data['juli_3']*23+ data['juli_2']*22+ data['juli_1']*21)/168

similar = data.ix[:,[u'mkdate', u'secode',u'simkcoef']]

#保存供查询排序分析
pd.io.sql.to_sql(similar,"similar",con=conn,if_exists='append')

猜你喜欢

转载自blog.csdn.net/sphinxrascal168/article/details/80006697