最新大大

https://www.cnblogs.com/wzj998/p/7278630.html

#-*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import tushare as ts
import pandas as pd
import lxml
import time
import datetime
import pymysql
pymysql.install_as_MySQLdb()
from sqlalchemy import create_engine

'''
股票基本信息获取模块
'''
# 调用stock_basic接口,获取股票基本信息
def get_security_info():
    # 获取字段包含:ts代码、股票代码、股票名称、行业、股票市场类型、交易所类型、上市日期
    stock_list = pro.stock_basic(list_status='L', fields='ts_code,symbol,name,industry,market,exchange,list_date')
    cursor = conn.cursorsor()
    # 执行删除数据命令
    cursor.execute('delect from stock_list')
    conn.commit()
    for index, row in stock_list.iterrows(): 
        ts_code = row['ts_code']
        symbol = row['symbol']
        name = row['name']
        industry = row['industry']
        market = row['market']
        exchange = row['exchange']
        list_date = datetime.datetime.strptime(row['list_date'],’%Y%m%d')
        cursor = conn.cursorsor()
        # 执行新增数据命令
        cursor.execute('insert into stock.stock_list(ts_code,symbol,name,industry,market,exchange,list_date) values (%s, %s, %s, %s, %s, %s, %s)', %(ts_code,symbol,name,industry,market,exchange,list_date))
        conn.commit()
        
# 调用stock_company接口,获取公司基本信息
def get_company_info():
    # 获取字段包含:ts代码、省份、城市、员工人数、主要业务及产品,SSE上交所 SZSE深交所 ,默认SSE
    company_list1 = pro.stock_company(exchange='SSE', fields='ts_code,province,city,employees,main_business')
    company_list2 = pro.stock_company(exchange='SZSE', fields='ts_code,province,city,employees,main_business')
    cursor = conn.cursorsor()
    # 执行删除数据命令
    cursor.execute('delect from stock_list')
    conn.commit()
    for index, row in company_list1.iterrows():
        ts_code = row['ts_code']
        province = row['province']
        city = row['city']
        employees = row['employees']
        main_business = row['main_business']
        cursor = conn.cursorsor()
        # 执行新增数据命令
        cursor.execute('insert into stock.company_list(ts_code,province,city,employees,main_business) values (%s, %s, %s, %s, %s)', %(ts_code,province,city,employees,main_business))
        conn.commit()
        
    for index, row in company_list2.iterrows():
        ts_code = row['ts_code']
        province = row['province']
        city = row['city']
        employees = row['employees']
        main_business = row['main_business']
        cursor = conn.cursorsor()
        # 执行新增数据命令
        cursor.execute('insert into stock.company_list(ts_code,province,city,employees,main_business) values (%s, %s, %s, %s, %s)', %(ts_code,province,city,employees,main_business))
        conn.commit()

# 调用concept_detail接口,获取概念股列表
def get_concept_info():
    # 获取字段包含:ts代码、concept_name
    stk = pro.stock_basic(list_status='L', fields='ts_code')
    cursor = conn.cursorsor()
    # 执行删除数据命令
    cursor.execute('delect from stock_list')
    conn.commit()    
    for index, row in stk.iterrows(): 
        ts_code = stk['ts_code']
        concept_list = pro.concept_detail(ts_code=ts_code, fields='ts_code,concept_name')
        cursor = conn.cursorsor()
        # 执行新增数据命令
        cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name))
        conn.commit()

# 调用stk_holdernumber接口,获取股东人数(不定期公布)    单次最大3000,总量不限制,每分钟调取100次
def get_stk_holder():
    # 获取字段:ts代码、公告日期、股东人数
    stk = pro.stock_basic(list_status='L', fields='ts_code')
    for index, row in stk.iterrows(): 
        ts_code = stk['ts_code']
        stk_holder = pro.stk_holdernumber(ts_code=ts_code,fields='ts_code,ann_date,holder_num')
        cursor = conn.cursorsor()
        # 执行新增数据命令
        cursor.execute('insert into stock.stock_list(ts_code,ann_date,holder_num) values (%s, %s, %s)', %(ts_code,ann_date,holder_num))
        conn.commit()
        time.sleep(2)

'''
股票每日行情数据获取模块
'''
# 调用daily接口,获取每日行情数据    每分钟内最多调取200次,每次4000条数据
def get_daily_info():
    # 获取字段包含:交易日期、ts代码、开盘价、最高价、最低价、收盘价、昨收价、涨跌幅、成交额
    stk = pro.stock_basic(list_status='L', fields='ts_code')
    for index, row in stk.iterrows():
        ts_code = row['ts_code']
        daily_info = pro.daily(ts_code=ts_code,fields='trade_date,ts_code,open,high,low,close,pre_close,pct_chg,amount')
        for index, row in stk.iterrows():
            trade_date = row['trade_date']
            ts_code = row['ts_code']
            open = row['open']
            high = row['high']
            low = row['low']
            close = row['close']
            pre_close = row['pre_close']
            pct_chg = row['pct_chg']
            amount = row['amount']
            cursor = conn.cursorsor()
            # 执行新增数据命令
            cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name))
            conn.commit()
        
# 爬取新浪财经股市雷达异动数据
def sina_stock_radar_Spider():
    for page_num in range(1,16):
        url = 'http://finance.sina.com.cn/stockradar/stockradar' + str(page_num) + '.html'
        headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"}

        html = requests.get(url=url,headers=headers)
        html.encoding = html.apparent_encoding

        soup = BeautifulSoup(html.text,'lxml')
        tr_list = soup.find_all('tr')
        for index, tr in enumerate(tr_list1):
            if index != 0:
                th_list = tr.find_all('th')
                change_time = th_list[0].string
                symbol = th_list[1].string
                name = th_list[2].string
                change_value = th_list[3].string
                trade_date = time.strftime("%Y%m%d", time.localtime())
                cursor = conn.cursorsor()
                # 执行新增数据命令 
                cursor.execute('insert into stock.stock_list(symbol,name,trade_date,change_time,change_value) values (%s, %s, %s, %s, %s)', %(symbol,name,trade_date,change_time,change_value))
                conn.commit()

# 调用top_inst接口,获取龙虎榜数据    单次最大10000
def get_lhb_info():
    # 获取字段:交易日期、ts代码、营业部名称、买入额(万)、买入占总成交比例、卖出额(万)、卖出占总成交比例、净成交额(万)
    lhb_list = pro.top_inst(trade_date='20180928',fields='trade_date,ts_code,exalter,buy,buy_rate,sell,sell_rate,net_buy')
    for index, row in stk.iterrows():
        trade_date = row['trade_date']
        ts_code = row['ts_code']
        yyb = row['exalter']
        buy = row['buy']
        buy_rate = row['buy_rate']
        sell = row['sell']
        sell_rate = row['sell_rate']
        net_buy = row['net_buy']
        cursor = conn.cursorsor()
        # 执行新增数据命令
        cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name))
        conn.commit()

# 调用limit_list接口,获取涨跌停数据    单次最大1000,总量不限制
def get_zdt_info():
    #获取字段包含:交易日期、ts代码、股票名称、收盘价、封单金额、封单金额/日成交金额、首次涨停时间、最后封板时间、打开次数、涨跌停强度、D跌停U涨停
    zdt_info = pro.limit_list(trade_date=, fields='trade_date,ts_code,name,close,fd_amount,fc_ratio,first_time,last_time,open_times,strth,limit')
    for index, row in stk.iterrows():
        trade_date = row['trade_date']
        ts_code = row['ts_code']
        name = row['name']
        close = row['']
        fd_amount = row['fd_amount']
        fc_ratio = row['fc_ratio']
        first_time = row['first_time']
        last_time = row['last_time']
        open_times = row['open_times']
        strth = row['strth']
        limit = row['limit']
        cursor = conn.cursorsor()
        # 执行新增数据命令
        cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name))
        conn.commit()

# 调用moneyflow接口,获取个股资金流向数据  单次最大提取4000行记录,总量不限制
def moneyflow():
    # 获取字段包含:交易日期、ts代码、大单买入金额(万元)、大单卖出金额(万元)、特大单买入金额(万元)、特大单卖出金额(万元)、净流入额(万元)
    stk = pro.stock_basic(list_status='L', fields='ts_code')
    for index, row in stk.iterrows():
        ts_code = row['ts_code']
        moneyflow = pro.moneyflow(ts_code=ts_code,trade_date=,fields='trade_date,ts_code,buy_lg_amount,sell_lg_amount,buy_elg_amount,sell_elg_amount,net_mf_amount')
        for index, row in stk.iterrows():
            trade_date = row['trade_date']
            ts_code = row['ts_code']
            buy_lg_amount = row['buy_lg_amount']
            sell_lg_amount = row['sell_lg_amount']
            buy_elg_amount = row['buy_elg_amount']
            sell_elg_amount = row['sell_elg_amount']
            net_mf_amount = row['net_mf_amount']
            cursor = conn.cursorsor()
            # 执行新增数据命令
            cursor.execute('insert into stock.stock_list(ts_code,concept_name) values (%s, %s)', %(ts_code,concept_name))
            conn.commit()
'''
大盘风控数据获取模块
'''

# 爬取新浪财经实时大单
def spider():
    for page in range(0,3000):
        url = 'http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_bill_all.php?num=100&sort=ticktime&asc=0&volume=10000&type=0&' + str(page)
        headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"}
        html = requests.get(url=url,headers=headers)
        html.encoding = html.apparent_encoding

        soup = BeautifulSoup(html.text,'lxml')
        tr_list = soup.find_all('tr')
        for index, tr in enumerate(tr_list1):
            if index != 0:
                th_list = tr.find_all('th')
                td_list = tr.find_all('td')
                
                change_time = th_list[2].string
                symbol = th_list[1].string
                name = th_list[0].string
                cjj = th_list[3]
                cjl = 
                change_value = th_list[3].string
                trade_date = time.strftime("%Y%m%d", time.localtime())
                cursor = conn.cursorsor()
                # 执行新增数据命令 
                cursor.execute('insert into stock.stock_list(symbol,name,trade_date,change_time,change_value) values (%s, %s, %s, %s, %s)', %(symbol,name,trade_date,change_time,change_value))
                conn.commit()
    
'''
市场资讯信息获取模块
'''

'''
个股风控数据获取模块
'''
       
# 主函数
if __name__=='__main__':
    ts.set_token('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7')
    pro = ts.pro_api()
    # engine = create_engine('mysql://root:[email protected]/stock?charset=utf8')
    # 打开数据库连接
    conn = pymysql.connect(host="127.0.0.1", port="3306", user='root', password='123456',database='stock_data',charset="utf-8")
    # 使用cursorsor()方法获取操作游标
    cursorsor = conn.cursor()
    # 获取股票基本信息
    get_security_info()
    # 获取公司基本信息
    get_company_info()
    # 爬取新浪财经股市雷达异动数据
    sina_stock_radar_Spider()

    # 关闭数据库连接
    conn.close()
    
    
    
#-*- coding:utf-8 -*-

import requests
from bs4 import BeautifulSoup
import time
import tushare as ts
import pandas as pd
import lxml

def spider():
    for page in range(0, 3000):
        url = 'http://vip.stock.finance.sina.com.cn/quotes_service/view/cn_bill_all.php?num=100&sort=ticktime&asc=0&volume=10000&type=0&' + str(
            page)
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0"}
        html = requests.get(url=url, headers=headers)
        html.encoding = html.apparent_encoding

        soup = BeautifulSoup(html.text, 'lxml')
        tr_list = soup.find_all('tr')
        #print(tr_list)
        for index, tr in enumerate(tr_list):
            if index != 0:
                th_list = tr.find_all('th')
                print(th_list)
                print(type(th_list))




                td_list = tr.find_all('td')
                value = tr_list[3].sting
                print(value)

               #         cjl = td_list[1].content
                #        print(cjl)
                '''
                name = th_list[0].string
                print(name)


                code = th_list[1].sting
                print(code)
                mytime = tr_list[2].sting
                print(mytime)
                cjj = td_list[0].sting
                print(cjj)
                '''
if __name__=='__main__':
    spider()

猜你喜欢

转载自www.cnblogs.com/Iceredtea/p/11735976.html