Python ---- 爬虫 爬取LOL英雄皮肤图片

1. 分析网页数据

1.1 全部英雄网页:https://lol.qq.com/data/info-heros.shtml
1.2 查找获取所有英雄信息的地址在这里插入图片描述
1.3 获取全部英雄的请求地址和请求方法
在这里插入图片描述
1.4 获取全部英雄的请求头部
在这里插入图片描述
1.5 获取全部英雄数据预览
在这里插入图片描述
1.6 同样的方法获取单个英雄的皮肤列表数据接口【以黑暗之女为例】
在这里插入图片描述

2 爬取所有英雄信息

  1. 设置全局变量保存英雄列表地址、皮肤列表地址、皮肤图片保存文件夹、公用头部;
  2. 使用 requests 获取【英雄列表地址】返回的所有英雄信息;
def __init__(self):
    self.heroListUrl = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
    self.heroSkinsUrl = 'https://game.gtimg.cn/images/lol/act/img/js/hero/'
    self.skinsFolder = 'lol_skins'
    self.headers = {
      "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36",
      "Referer": "https://lol.qq.com/"
    }
    
def getCurrentUrlData(self, url):
    """获取传入地址的数据"""
    results = None
    try:
      res = requests.get(url, headers = self.headers)
      results = json.loads(res.text)
    except Exception as e:
      print(e)
      return '获取【{}】数据失败!'.format(url)
    else:
      return results

  def getHeroList(self):
    """获取英雄列表"""
    heroList = self.getCurrentUrlData(self.heroListUrl)['hero']
    return heroList

3 获取当前英雄的皮肤信息列表

  1. 循环英雄列表获取当前英雄的heroId;
  2. 根据英雄的 heroId ,获取英雄的皮肤列表;
def loopHeroListGetHeroId(self):
    """循环英雄列表获取当前英雄的heroId"""
    for hero in self.heroList:
      skinsList = self.getHeroSkinsList(hero.get('heroId'))
      self.downloadSkinsList(skinsList)
      
def getHeroSkinsList(self, heroId):
    """获取当前英雄皮肤列表"""
    heroSkinsList = self.getCurrentUrlData('{}{}{}'.format(self.heroSkinsUrl, heroId, '.js'))['skins']
    return heroSkinsList

4 循环皮肤列表下载皮肤

  1. 循环皮肤列表,获取当前皮肤信息;
  2. 下载皮肤,获取 name,mainImg;
  3. 判断 mainImg 不为空字符串;
  4. requests 获取 mainImg 图片内容;
  5. 如果成功,写入 imgPath 文件。
def downloadSkin(self, skinInfo):
    """根据传入皮肤信息,下载当前皮肤"""
    try:
      skinName = '{}{}'.format(skinInfo.get('name').replace('/','.'),'.jpg')
      skinId = skinInfo.get('skinId')
      mainImg = skinInfo.get('mainImg')
      # mainImg = 'https://game.gtimg.cn/images/lol/act/img/skin/big{}.jpg'.format(skinId)
      if mainImg != "":
        request = requests.get(mainImg)
        if request.status_code == 200:
          imgPath = os.path.join(self.skinsFolder, skinName)
          with open(imgPath, 'wb') as img:
            print('【{}】图片下载成功!'.format(skinInfo.get('name').replace('/','.')))
            img.write(request.content)
        else:
          print('【{}】图片下载失败!'.format(skinInfo.get('name').replace('/','.')))
    except Exception as e:
      print(e)
      print('{} 下载失败'.format(skinName))
      print('{} 下载失败图片地址'.format(mainImg))

  def downloadSkinsList(self, skinsList):
    """循环皮肤列表获取当前皮肤信息"""
    for skin in skinsList:
      self.downloadSkin(skin)

5 完整代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
# https://lol.qq.com/data/info-heros.shtml
@Author  :Rattenking
@Date    :2021/02/22 16:19
@CSDN	 :https://blog.csdn.net/m0_38082783
"""
import os
import json
import time
import requests

class DownloadLOLSkin():
  def __init__(self):
    self.heroListUrl = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
    self.heroSkinsUrl = 'https://game.gtimg.cn/images/lol/act/img/js/hero/'
    self.skinsFolder = 'lol_skins'
    self.headers = {
      "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36",
      "Referer": "https://lol.qq.com/"
    }

  def getCurrentUrlData(self, url):
    """获取传入地址的数据"""
    results = None
    try:
      res = requests.get(url, headers = self.headers)
      results = json.loads(res.text)
    except Exception as e:
      print(e)
      return '获取【{}】数据失败!'.format(url)
    else:
      return results

  def getHeroList(self):
    """获取英雄列表"""
    heroList = self.getCurrentUrlData(self.heroListUrl)['hero']
    return heroList

  def getHeroSkinsList(self, heroId):
    """获取当前英雄皮肤列表"""
    heroSkinsList = self.getCurrentUrlData('{}{}{}'.format(self.heroSkinsUrl, heroId, '.js'))['skins']
    return heroSkinsList

  def downloadSkin(self, skinInfo):
    """根据传入皮肤信息,下载当前皮肤"""
    try:
      skinName = '{}{}'.format(skinInfo.get('name').replace('/','.'),'.jpg')
      skinId = skinInfo.get('skinId')
      mainImg = skinInfo.get('mainImg')
      # mainImg = 'https://game.gtimg.cn/images/lol/act/img/skin/big{}.jpg'.format(skinId)
      if mainImg != "":
        request = requests.get(mainImg)
        if request.status_code == 200:
          imgPath = os.path.join(self.skinsFolder, skinName)
          with open(imgPath, 'wb') as img:
            print('【{}】图片下载成功!'.format(skinInfo.get('name').replace('/','.')))
            img.write(request.content)
        else:
          print('【{}】图片下载失败!'.format(skinInfo.get('name').replace('/','.')))
    except Exception as e:
      print(e)
      print('{} 下载失败'.format(skinName))
      print('{} 下载失败图片地址'.format(mainImg))

  def downloadSkinsList(self, skinsList):
    """循环皮肤列表获取当前皮肤信息"""
    for skin in skinsList:
      self.downloadSkin(skin)
  
  def loopHeroListGetHeroId(self):
    """循环英雄列表获取当前英雄的heroId"""
    for hero in self.heroList:
      skinsList = self.getHeroSkinsList(hero.get('heroId'))
      self.downloadSkinsList(skinsList)
      
  def createFolder(self):
    """创建存放皮肤的文件夹"""
    if not os.path.exists(self.skinsFolder):
      os.mkdir(self.skinsFolder)

  def run(self):
    """运行当前脚本"""
    self.createFolder()
    self.heroList = self.getHeroList()
    startTime = int(round(time.time() * 1000))
    self.loopHeroListGetHeroId()
    endTime = int(round(time.time() * 1000))
    print('本次批量下载用时:{} ms'.format(endTime - startTime))
    
if __name__ == '__main__':
  lol = DownloadLOLSkin()
  # 执行脚本
  lol.run()

8 结果预览

在这里插入图片描述

7 总结

  1. 下载方法中可以采用获取皮肤id进行下载皮肤;
    mainImg = 'https://game.gtimg.cn/images/lol/act/img/skin/big{}.jpg'.format(skinId)
    
  2. 由于在 mainImg 为空字符串时,通过皮肤id获取的下载地址可能下载失败,但是增加了请求次数,所以文章采用的是直接获取 mainImg 字段下载皮肤;
  3. 保存皮肤文件夹的创建和判断 createFolder;
  4. 由于数据较多,建议不要采用递归下载皮肤,直接采用循环。

猜你喜欢

转载自blog.csdn.net/m0_38082783/article/details/114078546
今日推荐