Python爬取小米众筹数据

Python爬取小米众筹数据

#爬取小米众筹
import pymongo
import requests
import time

client = pymongo.MongoClient('localhost', 27017)
MI = client['MI']
zc_items = MI['zc_items']

def spider(the_url):
    main_url = "https://home.mi.com/crowdfundinglist"
    session = requests.Session()
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36',
        'Referer': 'https://home.mi.com/crowdfundinglist?id=78&title=%E4%BC%97%E7%AD%B9&trace=m1z1-6033-29',
        'Host': 'home.mi.com',
        'Origin': 'https://home.mi.com',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        'Cookie': 'youpindistinct_id=164dc64d48c395-01dba0ddc4fe75-d35346d; mijiatrace=m1z1-6033-29; Hm_lvt_3d0145da4163eae34eb5e5c70dc07d97=1532705970; Hm_lpvt_3d0145da4163eae34eb5e5c70dc07d97=1532705970; youpin_sessionid=164dc6557ac-0dc2302fd1eeb3-207f; mjclient=m'
        }
    r = session.get(main_url, headers=header)
    #print(r.status_code)
    timestamp = session.get("https://tp.hd.mi.com/gettimestamp")
    #print(timestamp.status_code)
    time.sleep(1)
    data = '{"request":{"model":"Homepage","action":"BuildHome","parameters":{"id":"78"}}}'
    res = session.post(the_url, headers=header, data=data)
    #print(res.status_code)
    return res.json()

if __name__ == "__main__":
    main_url = "https://home.mi.com/app/shopv3/pipe"
    MI_json = spider(main_url)
    for item in MI_json.get('result').get('request').get('data'):
        name = item['name']
        summary = item['summary']
        url = item['url']
        saled = item['saled']
        saled_count = item['saled_count']
        saled_fee = int(item['saled_fee']) / 100
        price = int(item['price_min']) / 100
        status = "已成功" if item['status'] else "众筹中"
        progress = item['progress']
        # 转换成localtime
        time_local1 = time.localtime(int(item['start']))
        time_local2 = time.localtime(int(item['end']))
        time_local3 = time.localtime(int(item['ctime']))
        time_local4 = time.localtime(int(item['utime']))
        # 转换成新的时间格式(2016-05-05 20:28:54)
        start = time.strftime("%Y-%m-%d %H:%M:%S", time_local1)
        end = time.strftime("%Y-%m-%d %H:%M:%S", time_local2)
        ctime = time.strftime("%Y-%m-%d %H:%M:%S", time_local3)
        utime = time.strftime("%Y-%m-%d %H:%M:%S", time_local4)
        print(name,summary,url,saled,saled_count,saled_fee,price,status,'{:.0%}'.format(progress/100),start,end,ctime,utime)
        zc_items.insert_one({'name':name,'summary':summary,'url':url,'saled':saled,'saled_count':saled_count,'saled_fee':saled_fee,
                             'price':price,'status':status,'progress':progress,'start':start,'end':end,'ctime':ctime,'utime':utime})

猜你喜欢

转载自blog.csdn.net/qq_38316655/article/details/81265937