python爬虫之爬取图片


from bs4 import BeautifulSoup as bs
import re
import requests

num = 1362
name_img = 0
for n in range(3):
    num += 1
    num_img = 0

    for i in range(30):
        num_img += 1
        # 获取网页信息
        url = 'http://www.mmjpg.com/mm/' + str(num) + '/' + str(num_img)
        # print(url)
        header = {
            'Accept': 'text / html, application / xhtml + xml, application / xml;'
                      'q = 0.9, image / webp, image / apng, * / *;q = 0.8',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'en - US, en;q = 0.9, zh - CN;q = 0.8, zh;q = 0.7 Cache -         
                                Control: max - age = 0',
            'Connection': 'keep - alive',
            'Host': 'www.mmjpg.com',
            'Upgrade-Insecure-Requests': '1',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537'
                          '.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
        }    # F12 设置请求头
        res = requests.get(url, headers=header)
        html = res.text

        # 提取图片链接
        soup = bs(html, features='lxml')
        img_ = soup.img    
        url_img = img_.find_all('img', {'src': re.compile('.*?\.jpg')}) # 字典对应class属性关键词和正则提取代码
        for i in url_img:
            print(i['src'])

        # 获取图片信息
        url_img = img_['src']   # 图片链接
        header1 = {
            'Accept': 'text / html, application / xhtml + xml, application / xml;'
                      'q = 0.9, image / webp, image / apng, * / *;q = 0.8',
            'Accept - Encoding': 'gzip, deflate',
            'Accept - Language': 'en - US, en;q = 0.9, zh - CN;q = 0.8, zh;q = 0.7',
            'Cache - Control': 'max - age = 0',
            'Connection': 'keep - alive',
            'Host': 'fm.shiyunjj.com',
            'If - Modified - Since': 'Mon, 21 May 2018 12: 08: 12 GMT',
            'If - None - Match': "5b02b6ac-476ec",
            'Referer': 'http: // www.mmjpg.com / mm / 1363',
            'Upgrade - Insecure - Requests': '1',
            'User - Agent': 'Mozilla / 5.0(Windows NT 10.0; Win64;x64) AppleWebKit / 537.36(KHTML, like'
                            'Gecko) Chrome / 68.0.3440.106 Safari / 537.36'
        } # 图片链接请求头
        html = requests.get(url_img, headers=header1) # 图片信息
        # 下载图片
        name_img += 1
        with open('{}.jpg'.format(name_img), 'wb') as f:
            f.write(html.content)
    name_img += 100
python爬虫之爬取图片

猜你喜欢