python爬虫实例之百度图片的批量下载


#objURL
from urllib.request import *
#这一步导入到开网址的函数
import re
#url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fr=&hs=0&xthttps=111111&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=TFBOYS&oq=TFBOYS&rsp=-1'
#url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1504857959439_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&ctd=1504857959439%5E00_1254X613&word=%E7%BE%8E%E5%A5%B3'
url = "https://image.baidu.com/search/index?ct=201326592&cl=2&st=-1&lm=-1&nc=1&ie=utf-8&tn=baiduimage&ipn=r&rps=1&pv=&fm=rs5&word=%E4%BC%98%E7%BE%8E%E5%A5%B3&oriquery=%E4%B8%AD%E5%9B%BD%E7%9A%84%E7%BE%8E%E5%A5%B3&ofr=%E4%B8%AD%E5%9B%BD%E7%9A%84%E7%BE%8E%E5%A5%B3&sensitive=0"
html = urlopen(url)
obj = html.read().decode()
#获取到html代码

urls = re.findall(r'"objURL":"(.*?)"',obj)
#根据链接下载
#print(urls)
index = 1
mylist = ['.jpg','.gif','.png']
for url in urls:
    if index <= 10:
        try:
            for hz in mylist:
                if re.search(hz,url):
                    print('正在下载第%d张'%(index))
                    urlretrieve(url, str(index)+hz)
                    '''相对地址,urlretrieve(url, filename=None, reporthook=None, data=None):

                        url:下载链接地址
                        filename:指定保存本地路径
                        data:返回一个包含两个元素的(filename, headers) 元组,filename 表示保存到本地的路径,header表示服务器的响应头'''
                    index += 1
                    break
            else:
                print('正在下载第%d张'%(index))
                urlretrieve(url,str(index)+'.jpg')
                #相对地址,urlretrieve(下载链接,下载之后保存地址)
                index += 1
        except Exception:
            print("下载失败%d张"%index)
    else:
        print('十张图片下载结束!')
        break

猜你喜欢

转载自blog.csdn.net/Luenci379/article/details/81583192