python爬取百度图片并下载

爬取百度贴吧图片

import requests
from bs4 import BeautifulSoup
import  urllib.request
def getHtml(url):# 爬取网页源代码
    # headers={
    #     "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
    # }
    response=requests.get(url)
    response.encoding=response.apparent_encoding # 设置编码
    return response.text

def getData(html):
    src=[]	# 存放图片src链接
    soup=BeautifulSoup(html,'html.parser')# BeautifulSoup解析
    imglist=soup.find_all('img')# 获取img图片集合
    lenth = len(imglist)  # 计算图片集合的个数
    for i in range(lenth):
        src.append(imglist[i].attrs['src'])# 获取src链接
    return src

if __name__ == '__main__':
    url="https://tieba.baidu.com/p/4803144798"
    html=getHtml(url)
    img=getData(html)
    imgName = 0 # 计数,下载图片个数
    for href in img:
        if href[0:4]=='http':   # 去除不规范src链接
            name = "G:\\images\\"+str(imgName)+".jpg"   # 图片保存地址及命名拼接
            conn = urllib.request.urlopen(href)
            f = open(name, 'wb')
            f.write(conn.read())    # 将图片写入磁盘
            f.close()
            imgName += 1
            print('正下下载第%s图片' % imgName)

猜你喜欢

转载自blog.csdn.net/qq_40351478/article/details/88966791