[02] project king of glory hero searchable data structure
1 goal
1, from https://pvp.qq.com/web201605/js/herolist.json get hero profile herolist.json
2, the following is assumed that the hero type identifier
hero_type = [ "All", "Warrior", "Mage", "tank", "Assassin", "shooter", "secondary"]
3, from https://pvp.qq.com/web201605/herolist.shtml get a list of pages hero, and the hero is proposed name and picture links
4, extract hero avatar name and link information from a web page
5, the combined information and construct two heroes data list details, e.g.
[ 'Chang E', 'Master | Warrior | tanks', ' Hanyue Princess | flower dew reflection ', ' game.gtimg.cn/images/yxzj/img201606/heroimg/515/515.jpg '], [' Shangguan Waner ' ' Master | all | Assassin ', ' fleeting pen | Xiuzhu letters', 'game.gtimg.cn/images/yxzj/img201606/heroimg/513/513.jpg'], ]
6, the structure is easy to build a data search function and design, implement the search function
def lookup(index,keyword):
pass
2, code implementation
import json
from bs4 import BeautifulSoup as bs
import requests
0.1 Database
requests.get = r ( 'https://pvp.qq.com/web201605/js/herolist.json')
# find a list of heroes json file from the official website
hero_list = json.loads (r.text)
hero_list = None
## 保存
with open("all_hero.json", 'wt',encoding="utf-8") as fd:
fd.write(r.text)
# 恢复
with open('all_hero.json',encoding="utf-8") as json_data:
hero_list = json.load(json_data)
def search_for_hero_info(name=None):
for hero in hero_list:
if "cname" in hero:
if hero["cname"] == name:
return hero
return None
hero_type = [ "All", "Warrior", "Mage", "tank", "Assassin", "shooter", "secondary"]
from selenium import webdriver
browser = webdriver.Chrome('./chromedriver')
browser.get("https://pvp.qq.com/web201605/herolist.shtml")
html = browser.page_source
browser.quit()
## 保存HTML
with open("hero_web.html", 'w',encoding="utf-8") as fd:
fd.write(html)
## 恢复保存的HTML
hero_html = None
with open("hero_web.html", 'r',encoding="utf-8") as fd:
hero_html = fd.read()
def build_hero_type(hero):
combine_type = []
if "hero_type" in hero:
combine_type.append(hero_type[hero["hero_type"]])
if "new_type" in hero:
combine_type.append(hero_type[hero["new_type"]])
if "hero_type2" in hero:
combine_type.append(hero_type[hero["hero_type2"]])
return(('|').join(combine_type))
#[hero_name, hero_type, hero_skin, hero_url]
def merge_hero_info(hero_html, hero_json):
all_heros = []
for hero in hero_html:
hero_detail = search_for_hero_info(hero[0])
all_heros.append([hero[0],build_hero_type(hero_detail),hero_detail["skin_name"].strip(" '"),hero[1]])
return all_heros
hero_soup = bs(html,'lxml')
hero_html_list=hero_soup.find("ul",class_="herolist")
all_hero_list =hero_html_list.find_all("li")
gen_heros=[[info.text, info.img["src"].strip("/")] for info in all_hero_list]
combined_heros = merge_hero_info(gen_heros, hero_list)
Build up index ##
add_to_index
`` `
index IS
[ [<keyword>, [<hero_detail>, ...]], [<keyword>, [<hero_detail>, ...]] ... ] keyword String A url `` ` [ [Unit, factor], [] ] [ [keyword], [ [ 'Barry Yuen policy', 'Assassin | all', 'mad clamor of sickle | Venice carnival', 'http : //game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg '], [' Barry Yuen policy ',' Assassin | mad it all ',' clamor sickle | Venice carnival '' http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg '], [' Barry Yuen policy ',' Assassin | all ',' mad clamor of sickle | Venice carnival ' 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg']],]## use list for storage
[ 'Barry Yuen policy', 'Assassin | All', 'mad clamor of sickle | Venice Carnival', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg']
0 2 build up index¶
add_to_index
index is [
[<keyword>,[<hero_detail>,...]], [<keyword>,[<hero_detail>,...]]...
] Keyword string the url
[ [unit,factor],[] ]
[
[keyword], [[ 'Barry Yuen policy', 'Assassin | All', 'mad clamor of sickle | Venice Carnival', ' http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/ 195.jpg '], [' Barry Yuen policy ',' Assassin | mad it all ',' clamor sickle | Venice carnival ',' http://game.gtimg.cn/images/yxzj/img201606/heroimg/195 /195.jpg '], [' Barry Yuen policy ',' Assassin | mad it all ',' clamor sickle | Venice carnival ',' http://game.gtimg.cn/images/yxzj/img201606/heroimg/ 195 / 195.jpg ']],
]
3 use list for storage
[ 'Barry Yuen policy', 'Assassin | All', 'mad clamor of sickle | Venice Carnival', ' http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg ']
def get_keywords_array(hero):
"""
根据英雄信息,生成keyword的列表
[hero_name, hero_type, hero_skin, hero_url]
"""
keywords =[]
if hero[0]:
keywords.append(hero[0])
if hero[1]:
keywords += hero[1].split('|')
if hero[2]:
keywords +=hero[2].split('|')
return keywords
add_to_index DEF (index, keyword, info):
"" "
add an index to the list of search data
" ""
for entry in index:
IF entry [0] == keyword:
entry [. 1] .append (info)
return
#not Find
index.append ([keyword, [info] ])
def build_up_index(index_array):
"""
创建搜索数据列表
"""
for hero_info in combined_heros:
keywords = get_keywords_array(hero_info)
for key in keywords:
add_to_index(index_array,key,hero_info)
The Lookup Information by keywords #
DEF the Lookup (index, keyword):
"" "
search based on keywords in the list
" ""
for entry in index:
IF entry [0] == keyword:
return entry [1]
#not the Find
return entry [0]
search_index=[]
build_up_index(search_index)
lookup(search_index,"苏烈")