python爬取美团数据

# coding=utf-8
import csv
import time
import requests
import json


# 区域店铺id ct_Poi cateName抓取,传入参数为区域id
def crow_id(areaid):
    id_list = []
    url = 'https://meishi.meituan.com/i/api/channel/deal/list'
    head = {'Host': 'meishi.meituan.com',
            'Accept': 'application/json',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Referer': 'https://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Mobile Safari/537.36',
            'Cookie': 'XXXXXXXXXXXXXX'
            }
    p = {'https': 'https://27.157.76.75:4275'}
    data = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "",
            "partner": 126, "riskLevel": 1, "optimusCode": 10,
            "originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1",
            "offset": 0, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid, "sort": "default",
            "deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "", "poi_attr_20033": ""}
    r = requests.post(url, headers=head, data=data, proxies=p)
    result = json.loads(r.text)
    totalcount = result['data']['poiList']['totalCount']  # 获取该分区店铺总数,计算出要翻的页数
    datas = result['data']['poiList']['poiInfos']
    print(len(datas), totalcount)
    for d in datas:
        d_list = ['', '', '', '']
        d_list[0] = d['name']
        d_list[1] = d['cateName']
        d_list[2] = d['poiid']
        d_list[3] = d['ctPoi']
        id_list.append(d_list)
    print('Page:1')
    # 将数据保存到本地csv
    with open('meituan_id.csv', 'a', newline='', encoding='gb18030')as f:
        write = csv.writer(f)
        for i in id_list:
            write.writerow(i)

    # 开始爬取第2页到最后一页
    offset = 0
    if totalcount > 15:
        totalcount -= 15
        while offset < totalcount:
            id_list = []
            offset += 15
            m = offset / 15 + 1
            print('Page:%d' % m)
            # 构造post请求参数,通过改变offset实现翻页
            data2 = {"uuid": "09dbb48e-4aed-4683-9ce5-c14b16ae7539", "version": "8.3.3", "platform": 3, "app": "",
                     "partner": 126, "riskLevel": 1, "optimusCode": 10,
                     "originUrl": "http://meishi.meituan.com/i/?ci=30&stid_b=1&cevent=imt%2Fhomepage%2Fcategory1%2F1",
                     "offset": offset, "limit": 15, "cateId": 1, "lineId": 0, "stationId": 0, "areaId": areaid,
                     "sort": "default",
                     "deal_attr_23": "", "deal_attr_24": "", "deal_attr_25": "", "poi_attr_20043": "",
                     "poi_attr_20033": ""}
            try:
                r = requests.post(url, headers=head, data=data2, proxies=p)
                print(r.text)
                result = json.loads(r.text)
                datas = result['data']['poiList']['poiInfos']
                print(len(datas))
                for d in datas:
                    d_list = ['', '', '', '']
                    d_list[0] = d['name']
                    d_list[1] = d['cateName']
                    d_list[2] = d['poiid']
                    d_list[3] = d['ctPoi']
                    id_list.append(d_list)
                # 保存到本地
                with open('meituan_id.csv', 'a', newline='', encoding='gb18030')as f:
                    write = csv.writer(f)
                    for i in id_list:
                        write.writerow(i)
            except Exception as e:
                print(e)


if __name__ == '__main__':
    # 直接将html代码中区域的信息复制出来,南澳新区的数据需要处理下,它下面没有分区
    a = {"areaObj": {"28": [{"id": 28, "name": "全部", "regionName": "福田区", "count": 4022},
                            {"id": 139, "name": "历下区", "regionName": "历下区", "count": 3307},
                            {"id": 744, "name": "梅林", "regionName": "梅林", "count": 421},

                            {"id": 7996, "name": "福田保税区", "regionName": "福田保税区", "count": 29}],

                     }}

    datas = a['areaObj']
    b = datas.values()
    area_list = []
    for data in b:
        for d in data[1:]:
            area_list.append(d)  # 将每个区域信息保存到列表,元素是字典
    l = 0
    old = time.time()
    for i in area_list:
        l += 1
        print('开始抓取第%d个区域:' % l, i['regionName'], '店铺总数:', i['count'])
        try:
            crow_id(i['id'])
            now = time.time() - old
            print(i['name'], '抓取完成!', '时间:%d' % now)
        except Exception as e:
            print(e)

猜你喜欢

转载自blog.csdn.net/jidawanghao/article/details/108336975