opening

# -*- coding: utf-8 -*-
import pandas as pd
import tushare as ts
import pymysql
import hashlib
import datetime
import time
import requests
import json
from sqlalchemy import create_engine
from redis import Redis
from concurrent.futures import ThreadPoolExecutor
import threading

#==================== crawling PC end data plate opening it ====================== ================================================== ============================================ 
DEF Kplspider (code_list, cur_date ):
     # Create an empty tag for storing DataFrame 
    stock_tag = pd.DataFrame (Columns = ( ' Symbol ' , ' tag ' , ' in_date ' )) 

    # creating a storage space concept DataFrame 
    stock_concept = pd.DataFrame (Columns = ( ' Symbol ' , ' Concept ' , ' in_date ' )) 

    URL = 'https://pchq.kaipanla.com/w1/api/index.php'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0'}

    for code in code_list:
        data = {
            'c': 'PCArrangeData',
            'a': 'GetHQPlate',
            'StockID': Code,
             ' Day ' : cur_date,
             ' SelType ' : ' . 1, 2,. 3,. 8,. 9,. 5,. 6,. 7 ' ,
             ' the UserID ' : 399 083 ,
             ' the Token ' : ' 71aef0e806e61ad3169ddc9473e37886 ' 
        } 
        the session = requests.Session () 
        HTML = session.post (url = url, the Data = the Data, headers = headers, the Verify = False) .text 

        # parse it opened stock label 
        tag = JSO n.loads (HTML) [ " pankou "]["tag"]
        stock_tag = stock_tag.append({'symbol':code,'tag':tag,'in_date':cur_date},ignore_index=True)
        print(stock_tag)
        # 解析所属开盘啦概念
        cept_list = json.loads(html)["stockplate"]
        try:
            for cept in cept_list:
                stock_concept = stock_concept.append({'symbol':code, 'concept':cept[0], 'in_date':cur_date},ignore_index=True)
                print(stock_concept)
        except:
            pass
        
    # 创建Pandas读写数据库引擎
    engine_mysql = create_engine('mysql://root:[email protected]/stock?charset=utf8')

    # Pandas读取数据库
    old_tag_list = pd.read_sql_query(sql='select * from stock.stock_tag', con=engine_mysql, index_col=None)
    old_concept_list = pd.read_sql_query(sql='select * from stock.stock_concept', con=engine_mysql, index_col=None)
    #print(old_tag_list)
    #print(old_concept_list)

    # 合并新、老数据
    tag_list = pd.concat([old_tag_list, stock_tag], join_axes=None, ignore_index=True)
    concept_list = pd.concat([old_concept_list, stock_concept], join_axes=None, ignore_index=True)
    #print(tag_list)
    #print(concept_list)

    # 去除所有重复项
    tag_list = tag_list.drop_duplicates(keep=False)
    concept_list = concept_list.drop_duplicates(keep=False)
    #print(tag_list)
    #print(concept_list)

    # Pandas data storage 
    tag_list.to_sql ( ' stock_tag ' , engine_mysql, index = False, if_exists = ' the append ' ) 
    concept_list.to_sql ( ' stock_concept ' , engine_mysql, index = False, if_exists = ' the append ' ) 

    Print ( " New storage % s data article " % stock_tag.shape [0])
     Print ( " new data storage section% s " % stock_concept.shape [0]) 

#==================== main function ============================ ================================================== ================================================== ==== 
IF  __name__ == ' __main__ ' :
     Print ( " stock list of programs started " ) 
    start = time.time () 

    # initialize tushare.pro Interface 
    Pro = ts.pro_api ( ' ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7 ' ) 

    # get the current date 
    cur_date the time.strftime = ( " % D% m the Y% " , time.localtime ()) 

    # Create a thread pool threads 2
    = ThreadPoolExecutor the pool (max_workers = 2 ) 

    # create Pandas read and write database engine 
    engine_mysql = create_engine ( ' MySQL: // root:? [email protected]/stock charset = utf8 ' ) 

    # L indicates normal on the market, P represents the suspension of listing 
    l_list pro.stock_basic = (list_status = ' L ' , = Fields ' ts_code, Symbol, name, Area, Exchange, list_status, list_date ' ) 
    P_LIST = pro.stock_basic (list_status = ' P ' , = Fields ' ts_code, Symbol, name, Area, the Exchange, list_status, list_date ' ) 

    # merger normal listing, suspension of listing data
    = pd.concat stock_list ([l_list, P_LIST], Axis = 0, ignore_index = True) 

    # Create an empty list 
    code_list = []
     for index, Row in stock_list.iterrows (): 
        Symbol = Row [ ' Symbol ' ] 
        code_list.append (Symbol) 

    Kplspider (code_list, cur_date) 

    End = the time.time ()
     Print ( ' stock list program were executed% 0.2f seconds. ' % ((End - Start)))

Guess you like

Origin www.cnblogs.com/Iceredtea/p/12159175.html