爬取美团网数据

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests

from lib.re_util import ReUtil

base_url = 'http://ns.meituan.com/meishi/b25710/'

cookies_str = '_lx_utm=utm_source%3Dgoogle%26utm_medium%3Dorganic; _lxsdk_cuid=169416ad3eec8-047ac50146444b-24414032-1fa400-169416ad3eec8; __mta=247371067.1551580718640.1551580718640.1551580718640.1; ci=547; rvct=547%2C20%2C406; _lxsdk=169416ad3eec8-047ac50146444b-24414032-1fa400-169416ad3eec8; mtcdn=K; client-id=0c7a0c6e-a2ae-4078-935c-09341ba89f12; lat=22.884164; lng=113.458377; userTicket=khcRUTMcyLDWOzCyHezncrrepAbXpqpxDHzQUdLl; u=274668244; n=BpY342261584; lsu=; token2=ge9zM0PLN0SCKhx2Pwe7MZzBwRoAAAAA_gcAALQDil5w1rLuttboIENK9gmMKE2ZJDAp8lyGAqOJ3Mu6opaoFQHf6u065s2tZaonxg; lt=ge9zM0PLN0SCKhx2Pwe7MZzBwRoAAAAA_gcAALQDil5w1rLuttboIENK9gmMKE2ZJDAp8lyGAqOJ3Mu6opaoFQHf6u065s2tZaonxg; uuid=e6b22bfb73b9426ba0c6.1551580712.2.0.0; unc=BpY342261584; _lxsdk_s=169483cec48-b6a-7bc-1de%7C%7C1'

cookies_dict = {}
for cookie in cookies_str.split(";"):
    k, v = cookie.split("=", 1)
    cookies_dict[k.strip()] = v.strip()

headers = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/71.0.3578.98 Chrome/71.0.3578.98 Safari/537.36'
}

page = requests.get(
    url=base_url,
    cookies=cookies_dict,
    headers=headers
)

def get_element_from_html(raw_html):
    regex = ReUtil.get_regex(begin_with=['"poiInfos":'], end_with=['},"comHeader"'])
    result = regex.findall(raw_html)
    print(result[0][1])
    ans = ""
    for i in range(4):
        ans += result[0][i]
    return result

get_element_from_html(page.text)
View Code

猜你喜欢

转载自www.cnblogs.com/liuweimingcprogram/p/10472391.html