python爬取数据hesder设置cookie

encoding:’utf-8’

import urllib.request
from bs4 import BeautifulSoup
import os
import time
import pymysql
import xlwt
import requests
import json

def getDatas():
url = ‘https://www.uchuanbo.com/member/ajax/news_list.php
headers = {
‘Accept’: ‘application/json, text/javascript, /; q=0.01’,
‘Accept-Encoding’: ‘gzip, deflate, br’,
‘Accept-Language’: ‘zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7’,
‘Connection’: ‘keep-alive’,
‘Content-Length’: ‘150’,
‘Content-Type’: ‘application/x-www-form-urlencoded; charset=UTF-8’,
‘Cookie’: ‘PHPSESSID=mcu4jn5gdirofjnakfrfn4tj62; OUTFOX_SEARCH_USER_ID_NCOO=479025887.6639701; Hm_lvt_321dd239bfd0ffe0ed3107c3da888f47=1534945762; ec_im_local_status=0; CUSTOM_INVITE_CONTENT=; ec_invite_state=0; LXB_REFER=www.google.com; ec_invite_state_time=1534945783551; ec_im_tab_num=0; kefutype=0; Hm_lpvt_321dd239bfd0ffe0ed3107c3da888f47=1534945797’,
‘DNT’: ‘1’,
‘Host’: ‘www.uchuanbo.com’,
‘Origin’: ‘https://www.uchuanbo.com‘,
‘Referer’: ‘https://www.uchuanbo.com/member/news.php‘,
‘User-Agent’: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36’,
‘X-Requested-With’: ‘XMLHttpRequest’,
}

for index in range(410):
    page = index + 1
    payload = {
        'pagesize': 20,
        'pagenumber': page,
        'keywords': '',
        'category': '0',
        'portal': 0,
        'area': 0,
        'prange': '0,100000000',
        'record': 0,
        'cansend': 0,
        'linktype': 0,
        'media_u_type': 0,
        'orderby': 'listorder DESC'
    }

    r = requests.post(url, data=payload,
                        headers=headers
                        )

    text = r.text
    text = json.loads(text)
    data = text['datalist']

    soup = BeautifulSoup(data, features="html.parser")
    trlist = soup.find_all('tr')

getDatas()

猜你喜欢

转载自blog.csdn.net/zhanghl150426/article/details/82021391
今日推荐