Python3 使用request模块爬取网页的图片

from urllib import request
import re
import os

def down_html(url, fname):
    r = request.urlopen(url)
    with open(fname, 'wb') as fobj:
        while True:
            data = r.read()
            if not data:
                break
            fobj.write(data)

def get_url(fname, patt):
    cpatt = re.compile(patt)
    list = []
    with open(fname) as fobj:
        for line in fobj:
            m = re.search(cpatt, line)
            if m:
                list.append(m.group())
    return list

if __name__ == '__main__':
    save_dir = '/tmp/images'
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    patt = 'http://[.\w/-]+\.(jpg|jpeg|png|gif)'
    html_url = 'http://www.tedu.cn/'
    html_fname = '/tmp/tedu.html'
    down_html(html_url, html_fname)
    urls = get_url(html_fname, patt)
    for url in urls:
        url_fname = url.split('/')[-1]
        image_fname = os.path.join(save_dir, url_fname)
        down_html(url, image_fname)

发布了73 篇原创文章 · 获赞 4 · 访问量 2万+

猜你喜欢

转载自blog.csdn.net/qq_27592485/article/details/102558166