爬虫小案例 爬取(妹子图片)

import requests #请求库

from bs4 import BeautifulSoup #网页解析库

def get_girl(url):

  #伪造请求头信息

  header = {

    #用户代理

    'User-Agent':"Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.1(KHTML,like Gecko) Chrome/22.0.1207.1 Safari/537.1",

    #上个页面的url

    'referer‘:'https://www.mzitu.com/', #这个很重要,好多f反爬虫都验证这个字段

  }

  html = request.get(url, headers = headers) # 发送请求

  all_list = BeautifulSoup(html.text,'lxml').find('ul',id = 'pin').find_all('li') #解析网页内容

  

  for i in all_list:

    girl_title = i.get_text() #拿到图片标题

    girl_url = i.find('img')['data-original'] #拿到图片url

    

    response = requests.get(girl_url, headers = headers) #下载图片

    file_name = girl_title + ".jpg" #拼接图片名称

    

    print("正在保存图片文件:" + file_name)

    with open(file_name,"wb") as f: #图片写入到本地

      f.write(response.content)

if __name__ == "__main__":

  for page in range(1,21) :#下载前20页

    url = 'https://www.mzitu.com/mm/page/%s' % page #拼接每一页的url

    get_gril(url) #得到图片

  

  

猜你喜欢

转载自www.cnblogs.com/yanhonghong/p/11681885.html