from bs4 import BeautifulSoup as bs
import re
import requests
num = 1362
name_img = 0
for n in range(3):
num += 1
num_img = 0
for i in range(30):
num_img += 1
# 获取网页信息
url = 'http://www.mmjpg.com/mm/' + str(num) + '/' + str(num_img)
# print(url)
header = {
'Accept': 'text / html, application / xhtml + xml, application / xml;'
'q = 0.9, image / webp, image / apng, * / *;q = 0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en - US, en;q = 0.9, zh - CN;q = 0.8, zh;q = 0.7 Cache -
Control: max - age = 0',
'Connection': 'keep - alive',
'Host': 'www.mmjpg.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537'
'.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
} # F12 设置请求头
res = requests.get(url, headers=header)
html = res.text
# 提取图片链接
soup = bs(html, features='lxml')
img_ = soup.img
url_img = img_.find_all('img', {'src': re.compile('.*?\.jpg')}) # 字典对应class属性关键词和正则提取代码
for i in url_img:
print(i['src'])
# 获取图片信息
url_img = img_['src'] # 图片链接
header1 = {
'Accept': 'text / html, application / xhtml + xml, application / xml;'
'q = 0.9, image / webp, image / apng, * / *;q = 0.8',
'Accept - Encoding': 'gzip, deflate',
'Accept - Language': 'en - US, en;q = 0.9, zh - CN;q = 0.8, zh;q = 0.7',
'Cache - Control': 'max - age = 0',
'Connection': 'keep - alive',
'Host': 'fm.shiyunjj.com',
'If - Modified - Since': 'Mon, 21 May 2018 12: 08: 12 GMT',
'If - None - Match': "5b02b6ac-476ec",
'Referer': 'http: // www.mmjpg.com / mm / 1363',
'Upgrade - Insecure - Requests': '1',
'User - Agent': 'Mozilla / 5.0(Windows NT 10.0; Win64;x64) AppleWebKit / 537.36(KHTML, like'
'Gecko) Chrome / 68.0.3440.106 Safari / 537.36'
} # 图片链接请求头
html = requests.get(url_img, headers=header1) # 图片信息
# 下载图片
name_img += 1
with open('{}.jpg'.format(name_img), 'wb') as f:
f.write(html.content)
name_img += 100
python爬虫之爬取图片
猜你喜欢
转载自blog.csdn.net/weixin_42016382/article/details/82460229
今日推荐
周排行