汽车之家 爬虫

import requests
from bs4 import BeautifulSoup
dict_s = {'Upgrade-Insecure-Requests':' 1','Referer':'https://www.696cf.com/htm/downlist6/',
'User - Agent':' Mozilla / 5.0(Windows NT 6.1;Win64;x64;rv: 60.0) Gecko / 20100101Firefox / 60.0',
'Accept':'text / html, application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
}

'''# 拿到 = requests.get(url,cookies={'__cfduid':'ded969018e77861273aee19f142c4c2201529330809',
'_ga':'GA1.2.282329388.1529330815',
'_gat_gtag_UA_108266294_3':'1',
'_gid':'GA1.2.664839187.1529330815',
'Hm_lpvt_767e27c6fc5a7b6a90ba665ed5f7559b':'1529338158',
'Hm_lvt_767e27c6fc5a7b6a90ba665ed5f7559b':'1529330811'})
'''

for i in range(899,900): #389,11319
url=("https://www.868cf.com/htm/down6/%s.htm" % str(i))

拿到 = requests.get(url)
拿到.encoding='utf-8'

soup = BeautifulSoup(拿到.text,'html.parser')
print(soup)
tag_name = soup.find('dd','film_title').text

tag_url=soup.find('a').find_all(id='downUL')
print(tag_name,tag_url)




# x=0
# y=[]
# tag_list = soup.find(id='wasu-pl-list').find_all(name='a')
#
# file = open('lj.txt', 'w', encoding='utf-8')
# for i in tag_list:
# h3 = i.find(name='h3')
# if not h3:
# continue
#
# summary = i.find('p').text

猜你喜欢

转载自www.cnblogs.com/hefang/p/9316634.html
今日推荐