1. 分析网页数据
1.1 全部英雄网页:https://lol.qq.com/data/info-heros.shtml
1.2 查找获取所有英雄信息的地址
1.3 获取全部英雄的请求地址和请求方法
1.4 获取全部英雄的请求头部
1.5 获取全部英雄数据预览
1.6 同样的方法获取单个英雄的皮肤列表数据接口【以黑暗之女为例】
2 爬取所有英雄信息
- 设置全局变量保存英雄列表地址、皮肤列表地址、皮肤图片保存文件夹、公用头部;
- 使用 requests 获取【英雄列表地址】返回的所有英雄信息;
def __init__(self):
self.heroListUrl = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
self.heroSkinsUrl = 'https://game.gtimg.cn/images/lol/act/img/js/hero/'
self.skinsFolder = 'lol_skins'
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36",
"Referer": "https://lol.qq.com/"
}
def getCurrentUrlData(self, url):
"""获取传入地址的数据"""
results = None
try:
res = requests.get(url, headers = self.headers)
results = json.loads(res.text)
except Exception as e:
print(e)
return '获取【{}】数据失败!'.format(url)
else:
return results
def getHeroList(self):
"""获取英雄列表"""
heroList = self.getCurrentUrlData(self.heroListUrl)['hero']
return heroList
3 获取当前英雄的皮肤信息列表
- 循环英雄列表获取当前英雄的heroId;
- 根据英雄的 heroId ,获取英雄的皮肤列表;
def loopHeroListGetHeroId(self):
"""循环英雄列表获取当前英雄的heroId"""
for hero in self.heroList:
skinsList = self.getHeroSkinsList(hero.get('heroId'))
self.downloadSkinsList(skinsList)
def getHeroSkinsList(self, heroId):
"""获取当前英雄皮肤列表"""
heroSkinsList = self.getCurrentUrlData('{}{}{}'.format(self.heroSkinsUrl, heroId, '.js'))['skins']
return heroSkinsList
4 循环皮肤列表下载皮肤
- 循环皮肤列表,获取当前皮肤信息;
- 下载皮肤,获取 name,mainImg;
- 判断 mainImg 不为空字符串;
- requests 获取 mainImg 图片内容;
- 如果成功,写入 imgPath 文件。
def downloadSkin(self, skinInfo):
"""根据传入皮肤信息,下载当前皮肤"""
try:
skinName = '{}{}'.format(skinInfo.get('name').replace('/','.'),'.jpg')
skinId = skinInfo.get('skinId')
mainImg = skinInfo.get('mainImg')
# mainImg = 'https://game.gtimg.cn/images/lol/act/img/skin/big{}.jpg'.format(skinId)
if mainImg != "":
request = requests.get(mainImg)
if request.status_code == 200:
imgPath = os.path.join(self.skinsFolder, skinName)
with open(imgPath, 'wb') as img:
print('【{}】图片下载成功!'.format(skinInfo.get('name').replace('/','.')))
img.write(request.content)
else:
print('【{}】图片下载失败!'.format(skinInfo.get('name').replace('/','.')))
except Exception as e:
print(e)
print('{} 下载失败'.format(skinName))
print('{} 下载失败图片地址'.format(mainImg))
def downloadSkinsList(self, skinsList):
"""循环皮肤列表获取当前皮肤信息"""
for skin in skinsList:
self.downloadSkin(skin)
5 完整代码
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
# https://lol.qq.com/data/info-heros.shtml
@Author :Rattenking
@Date :2021/02/22 16:19
@CSDN :https://blog.csdn.net/m0_38082783
"""
import os
import json
import time
import requests
class DownloadLOLSkin():
def __init__(self):
self.heroListUrl = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
self.heroSkinsUrl = 'https://game.gtimg.cn/images/lol/act/img/js/hero/'
self.skinsFolder = 'lol_skins'
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36",
"Referer": "https://lol.qq.com/"
}
def getCurrentUrlData(self, url):
"""获取传入地址的数据"""
results = None
try:
res = requests.get(url, headers = self.headers)
results = json.loads(res.text)
except Exception as e:
print(e)
return '获取【{}】数据失败!'.format(url)
else:
return results
def getHeroList(self):
"""获取英雄列表"""
heroList = self.getCurrentUrlData(self.heroListUrl)['hero']
return heroList
def getHeroSkinsList(self, heroId):
"""获取当前英雄皮肤列表"""
heroSkinsList = self.getCurrentUrlData('{}{}{}'.format(self.heroSkinsUrl, heroId, '.js'))['skins']
return heroSkinsList
def downloadSkin(self, skinInfo):
"""根据传入皮肤信息,下载当前皮肤"""
try:
skinName = '{}{}'.format(skinInfo.get('name').replace('/','.'),'.jpg')
skinId = skinInfo.get('skinId')
mainImg = skinInfo.get('mainImg')
# mainImg = 'https://game.gtimg.cn/images/lol/act/img/skin/big{}.jpg'.format(skinId)
if mainImg != "":
request = requests.get(mainImg)
if request.status_code == 200:
imgPath = os.path.join(self.skinsFolder, skinName)
with open(imgPath, 'wb') as img:
print('【{}】图片下载成功!'.format(skinInfo.get('name').replace('/','.')))
img.write(request.content)
else:
print('【{}】图片下载失败!'.format(skinInfo.get('name').replace('/','.')))
except Exception as e:
print(e)
print('{} 下载失败'.format(skinName))
print('{} 下载失败图片地址'.format(mainImg))
def downloadSkinsList(self, skinsList):
"""循环皮肤列表获取当前皮肤信息"""
for skin in skinsList:
self.downloadSkin(skin)
def loopHeroListGetHeroId(self):
"""循环英雄列表获取当前英雄的heroId"""
for hero in self.heroList:
skinsList = self.getHeroSkinsList(hero.get('heroId'))
self.downloadSkinsList(skinsList)
def createFolder(self):
"""创建存放皮肤的文件夹"""
if not os.path.exists(self.skinsFolder):
os.mkdir(self.skinsFolder)
def run(self):
"""运行当前脚本"""
self.createFolder()
self.heroList = self.getHeroList()
startTime = int(round(time.time() * 1000))
self.loopHeroListGetHeroId()
endTime = int(round(time.time() * 1000))
print('本次批量下载用时:{} ms'.format(endTime - startTime))
if __name__ == '__main__':
lol = DownloadLOLSkin()
# 执行脚本
lol.run()
8 结果预览
7 总结
- 下载方法中可以采用获取皮肤id进行下载皮肤;
mainImg = 'https://game.gtimg.cn/images/lol/act/img/skin/big{}.jpg'.format(skinId)
- 由于在 mainImg 为空字符串时,通过皮肤id获取的下载地址可能下载失败,但是增加了请求次数,所以文章采用的是直接获取 mainImg 字段下载皮肤;
- 保存皮肤文件夹的创建和判断 createFolder;
- 由于数据较多,建议不要采用递归下载皮肤,直接采用循环。