第一个简单的图片爬虫

# !/usr/bin/python3
# encoding: UTF-8

import re
import string
import urllib.request
from urllib.parse import quote

def get_html(url):

    changedURL = quote(url,safe=string.printable)

    page = urllib.request.urlopen(changedURL)
    htmlcode = page.read()

    return htmlcode

url = input('请输入:')

reg='src="(\S*?\.jpg)"'
reg_resource = re.compile(reg)
resourcelist = reg_resource.findall(str(get_html(url))) 

x = 0
for resource in resourcelist:
    urllib.request.urlretrieve(resource,'%s.jpg' %x)
    x += 1
print(x)

猜你喜欢

转载自www.cnblogs.com/kaisi/p/9089478.html
今日推荐