雪球股票信息爬取存入json文件

from urllib import request
import json

base_url = 'https://xueqiu.com/stock/quote_order.json?page={}&size=30&order=desc&exchange=CN&stockType=sha&column=symbol%2Cname%2Ccurrent%2Cchg%2Cpercent%2Clast_close%2Copen%2Chigh%2Clow%2Cvolume%2Camount%2Cmarket_capital%2Cpe_ttm%2Chigh52w%2Clow52w%2Chasexist&orderBy=percent&_=1530842809515'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
    'Cookie': 'aliyungf_tc=AQAAAG59DhnbIQIAM09FebpTt9sjHVp8; xq_a_token=7443762eee8f6a162df9eef231aa080d60705b21; xq_a_token.sig=3dXmfOS3uyMy7b17jgoYQ4gPMMI; xq_r_token=9ca9ab04037f292f4d5b0683b20266c0133bd863; xq_r_token.sig=6hcU3ekqyYuzz6nNFrMGDWyt4aU; _ga=GA1.2.1827567893.1530756945; _gid=GA1.2.1646579590.1530756945; u=691530756946503; device_id=23406a97cd30406028a23eaf5e36f52a; __utmc=1; __utmz=1.1530756948.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; Hm_lvt_1db88642e346389874251b5a1eded6e3=1530756945,1530842665; s=ds11zp8jiu; __utma=1.1827567893.1530756945.1530756948.1530842680.2; __utmt=1; __utmb=1.2.10.1530842680; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1530842739'
}


f1 = open('stock.json','w',encoding='utf-8')
f2 = open('stock.csv','w',encoding='utf-8')

# 给csv文件写入表头
req = request.Request('https://xueqiu.com/stock/quote_order.json?page=1&size=30&order=desc&exchange=CN&stockType=sha&column=symbol%2Cname%2Ccurrent%2Cchg%2Cpercent%2Clast_close%2Copen%2Chigh%2Clow%2Cvolume%2Camount%2Cmarket_capital%2Cpe_ttm%2Chigh52w%2Clow52w%2Chasexist&orderBy=percent&_=1530842809515',headers=headers)
response = request.urlopen(req)
data = response.read().decode('utf-8')
data = json.loads(data)
f2.write(','.join(data['column']) + '\n')

for i in range(1,47):
    fullurl =  base_url.format(i)
    print('downloading...%s' % fullurl)


    req = request.Request(fullurl,headers=headers)
    response = request.urlopen(req)

    # 数据存储
    data = response.read().decode('utf-8')
    data = json.loads(data)

    for item in data['data']:
        # item 是列表
        # json文件存储
        f1.write(json.dumps(item,ensure_ascii=False) + ',\n')
        print(type(item),item)

        # 把列表当中的每一个元素转化成字符型(str)
        item = [str(i) for i in item]
        f2.write(','.join(item) + '\n')

f1.close()
f2.close()

猜你喜欢

转载自blog.csdn.net/majiexiong/article/details/81949317