python regular crawling pictures

# conding:utf8
import requests
import re
import time

if __name__ == "__main__":
    # 所有的数据
    url = 'http://www.win4000.com/zt/qsmy.html'

    response = requests.get(url)
    # with open('./qsmy.html', mode='w', encoding='utf-8') as fp:
    #     fp.write(response.text)
    #     print('网页中的内容保存成功')

    # 我们想要的数据
    # <img src="http://static.win4000.com/home/images/placeholder.jpg" data-original = "http://pic1.win4000.com/wallpaper/5/53bcec5b3235b_270_185.jpg" />
    pattern = r'<img src=".*?" data-original = "(.*?)" />'
    html = response.text
    imahe_urls = re.findall(pattern, html)
    print(imahe_urls)
    for img_url in imahe_urls:
        print(img_url)
        response = requests.get(img_url)
        content = response.content
        file = img_url.rsplit('/', maxsplit=1)[1]
        with open('./tupian/%s' % file, mode='wb') as fp:
            fp.write(content)
            print('图片%s保存成功!' % file)
        time.sleep(1)

Guess you like

Origin www.cnblogs.com/llflifei/p/11915758.html