爬虫——————爬取中金所,深交所,上交所期权数据

先从深交所开始:直接上传源码:

from bs4 import BeautifulSoup
from lxml import etree
import pandas as pd
import akshare as ak
import datetime
import requests
import csv
from contextlib import closing
import time
from urllib.request import urlopen
import requests
from urllib import request
from io import BytesIO
import gzip
import random

#设定获取数据的日期
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()

for j in date['trade_date']:
    print(j)
    #session = requests.Session()
    # s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')
    # s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')
    #url = 'http://www.szse.com/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=%(r)s'%{'j':j,'r':random.random()}
        #'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}
    url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=0.5379373345285146'%{
    
    'j':j}
    print(url)
    response = requests.get(url)
    #print(response.content)
"""
这一块本人很不满意,先保存再读取,多此一举。主要是因为直接显示发现乱码,本人无论如何都无法解析为正常结果,
只能先放到xlsx,之后重新读取保存。请诸位大虾见到给小弟一点帮助,如何解决。多谢!!!!!!!!!!
""""
    with open('D:/结果存放3.xlsx', 'ab') as file_handle:  
        file_handle.write(response.content)  # 写入
        # file_handle.write('\n')
        df= pd.read_excel('D:/结果存放3.xlsx')
        df['trade_date'] = j
        df1 = df1.append(df)

df1.to_csv('szse.csv')

爬取上交所

import csv
from contextlib import closing
import time
from urllib.request import urlopen
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()
#//query.sse.com.cn/derivative/downloadRisk.do?trade_date=20201207&productType=0
for j in date['trade_date']:
    s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')
    s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')
    url = 'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{
    
    'YM':s,'D':s2}

    # 读取数据
    with closing(requests.get(url, stream=True)) as r:
        f = (line.decode('gbk') for line in r.iter_lines())
        reader = csv.reader(f,delimiter=',', quotechar=',')

        for row in reader:
            print(row)
            #print(row.reverse())
            df = pd.DataFrame(row)
            df1=df1.append(df.T)

df1.to_csv('sse.csv')

爬取中金所

import datetime
import requests
from lxml import etree
import pandas as pd
import akshare as ak
import time
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()
for j in date['trade_date']:
    s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')
    s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')
    url = 'http://www.cffex.com.cn/sj/hqsj/rtj/%(YM)s/%(D)s/index.xml?id=39'%{
    
    'YM':s,'D':s2}
    response = requests.get(url)
    p = etree.HTML((response.content))

    df = pd.DataFrame()
    for i in range(1,len(p.xpath('//dailydata'))):
        #print('//dailydata[{}]/instrumentid/text()'.format(i))
        # print(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))

        #df.loc[i,'instrument']=p.xpath('//dailydata[{}]/instrumentid/text()'.format(i))
        try:
            #print((p.xpath('//dailydata[{}]/instrumentid[1]/text()'))[i])
            df.loc[i,'instrumentid']=(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))

        except:
            df.loc[i,'instrumentid']=0

        try:
            df.loc[i,'tradingday']=(p.xpath('//dailydata[{}]/tradingday/text()'.format(i)))

        except:
            df.loc[i,'tradingday']=0
        try:
            df.loc[i,'openprice']=(p.xpath('//dailydata[{}]/openprice/text()'.format(i)))

        except:
            df.loc[i,'openprice']=0
        try:
            df.loc[i,'highestprice']=(p.xpath('//dailydata[{}]/highestprice/text()'.format(i)))
        except:
            df.loc[i,'highestprice'] =0
        try:
            df.loc[i,'lowestprice']=(p.xpath('//dailydata[{}]/lowestprice/text()'.format(i)))
        except:
            df.loc[i,'lowestprice']=0
        try:
            df.loc[i,'closeprice']=(p.xpath('//dailydata[{}]/closeprice/text()'.format(i)))
        except:
            df.loc[i,'closeprice'] = 0
        try:
            df.loc[i,'preopeninterest']=(p.xpath('//dailydata[{}]/preopeninterest/text()'.format(i)))
        except:
            df.loc[i,'preopeninterest'] = 0
        try:
            df.loc[i,'openinterest']=(p.xpath('//dailydata[{}]/openinterest/text()'.format(i)))
        except:
            df.loc[i,'openinterest'] = 0
        try:
            df.loc[i,'presettlementprice']=(p.xpath('//dailydata[{}]/presettlementprice/text()'.format(i)))
        except:
            df.loc[i,'presettlementprice'] = 0
        try:
            df.loc[i,'settlementpriceif']=(p.xpath('//dailydata[{}]/settlementpriceif/text()'.format(i)))
        except:
            df.loc[i,'settlementpriceif'] = 0
        try:
            df.loc[i,'settlementprice']=(p.xpath('//dailydata[{}]/settlementprice/text()'.format(i)))
        except:
            df.loc[i,'settlementprice'] = 0
        try:
            df.loc[i,'volume']=(p.xpath('//dailydata[{}]/volume/text()'.format(i)))
        except:
            df.loc[i,'volume'] = 0
        try:
            df.loc[i,'turnover']=(p.xpath('//dailydata[{}]/turnover/text()'.format(i)))
        except:
            df.loc[i,'turnover'] = 0
        try:
            df.loc[i,'productid']=(p.xpath('//dailydata[{}]/productid/text()'.format(i)))
        except:
            df.loc[i,'productid'] = 0
        try:
            df.loc[i,'delta']=(p.xpath('//dailydata[{}]/delta/text()'.format(i)))
        except:
            df.loc[i,'delta'] = 0
        try:
            df.loc[i,'expiredate']=(p.xpath('//dailydata[i]/expiredate/text()'.format(i)))
        except:
            df.loc[i,'expiredate'] = 0

    df1 = df1.append(df)

df1.to_csv('cffex.csv')

以上是爬取三大交易所期权数据的源代码,可以直接使用,也可以修改保存至数据库。

猜你喜欢

转载自blog.csdn.net/qq_26742269/article/details/111044397