[02] project king of glory hero searchable data structure

 

[02] project king of glory hero searchable data structure

1 goal

1, from https://pvp.qq.com/web201605/js/herolist.json get hero profile herolist.json

2, the following is assumed that the hero type identifier

hero_type = [ "All", "Warrior", "Mage", "tank", "Assassin", "shooter", "secondary"]

3, from https://pvp.qq.com/web201605/herolist.shtml get a list of pages hero, and the hero is proposed name and picture links

4, extract hero avatar name and link information from a web page

5, the combined information and construct two heroes data list details, e.g.

[ 'Chang E', 'Master | Warrior | tanks', ' Hanyue Princess | flower dew reflection ', ' game.gtimg.cn/images/yxzj/img201606/heroimg/515/515.jpg '], [' Shangguan Waner ' ' Master | all | Assassin ', ' fleeting pen | Xiuzhu letters', 'game.gtimg.cn/images/yxzj/img201606/heroimg/513/513.jpg'], ]

6, the structure is easy to build a data search function and design, implement the search function

  def lookup(index,keyword):
          pass

2, code implementation

import json
from bs4 import BeautifulSoup as bs
import requests

0.1 Database

requests.get = r ( 'https://pvp.qq.com/web201605/js/herolist.json') 
# find a list of heroes json file from the official website
hero_list = json.loads (r.text)

 

hero_list = None

## 保存
with open("all_hero.json", 'wt',encoding="utf-8") as fd:
    fd.write(r.text)

# 恢复
with open('all_hero.json',encoding="utf-8") as json_data:
  hero_list = json.load(json_data)
def search_for_hero_info(name=None):
  for hero in hero_list:
      if "cname" in hero:
          if hero["cname"] == name:
              return hero
  return None
hero_type = [ "All", "Warrior", "Mage", "tank", "Assassin", "shooter", "secondary"]
from selenium import webdriver

browser = webdriver.Chrome('./chromedriver')
browser.get("https://pvp.qq.com/web201605/herolist.shtml")
html = browser.page_source
browser.quit()
## 保存HTML
with open("hero_web.html", 'w',encoding="utf-8") as fd:
    fd.write(html)
## 恢复保存的HTML
hero_html = None
with open("hero_web.html", 'r',encoding="utf-8") as fd:
    hero_html = fd.read()
def build_hero_type(hero):
  combine_type = []    
  if "hero_type" in hero:
      combine_type.append(hero_type[hero["hero_type"]])
  if "new_type" in hero:
      combine_type.append(hero_type[hero["new_type"]])
  if "hero_type2" in hero:
      combine_type.append(hero_type[hero["hero_type2"]])
  return(('|').join(combine_type))
#[hero_name, hero_type, hero_skin, hero_url]
def merge_hero_info(hero_html, hero_json):
  all_heros = []
  for hero in hero_html:
      hero_detail = search_for_hero_info(hero[0])
      all_heros.append([hero[0],build_hero_type(hero_detail),hero_detail["skin_name"].strip("
'"),hero[1]])    
  return all_heros

 

hero_soup = bs(html,'lxml')
hero_html_list=hero_soup.find("ul",class_="herolist")
all_hero_list =hero_html_list.find_all("li")
gen_heros=[[info.text, info.img["src"].strip("/")] for info in all_hero_list]

 

combined_heros = merge_hero_info(gen_heros, hero_list)
Build up index ## 
add_to_index
`` `
index IS
[ [<keyword>, [<hero_detail>, ...]], [<keyword>, [<hero_detail>, ...]] ... ] keyword String A url `` ` [ [Unit, factor], [] ] [ [keyword], [ [ 'Barry Yuen policy', 'Assassin | all', 'mad clamor of sickle | Venice carnival', 'http : //game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg '], [' Barry Yuen policy ',' Assassin | mad it all ',' clamor sickle | Venice carnival '' http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg '], [' Barry Yuen policy ',' Assassin | all ',' mad clamor of sickle | Venice carnival ' 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg']],]## use list for storage





















[ 'Barry Yuen policy', 'Assassin | All', 'mad clamor of sickle | Venice Carnival', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg']

0 2 build up index

add_to_index

index is [

[<keyword>,[<hero_detail>,...]], [<keyword>,[<hero_detail>,...]]...

] Keyword string the url

[ [unit,factor],[] ]

[

[keyword], [[ 'Barry Yuen policy', 'Assassin | All', 'mad clamor of sickle | Venice Carnival', ' http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/ 195.jpg '], [' Barry Yuen policy ',' Assassin | mad it all ',' clamor sickle | Venice carnival ',' http://game.gtimg.cn/images/yxzj/img201606/heroimg/195 /195.jpg '], [' Barry Yuen policy ',' Assassin | mad it all ',' clamor sickle | Venice carnival ',' http://game.gtimg.cn/images/yxzj/img201606/heroimg/ 195 / 195.jpg ']],

]

3 use list for storage

[ 'Barry Yuen policy', 'Assassin | All', 'mad clamor of sickle | Venice Carnival', ' http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg ']

def get_keywords_array(hero):
  """
  根据英雄信息,生成keyword的列表
  [hero_name, hero_type, hero_skin, hero_url]
  """
  keywords =[]
  if hero[0]:
      keywords.append(hero[0])
  if hero[1]:
      keywords += hero[1].split('|')
  if hero[2]:
      keywords +=hero[2].split('|')
  return keywords
add_to_index DEF (index, keyword, info): 
  "" "
  add an index to the list of search data
  " ""    
  for entry in index:
      IF entry [0] == keyword:
          entry [. 1] .append (info)
          return
  #not Find
  index.append ([keyword, [info] ])
def build_up_index(index_array):
  """
  创建搜索数据列表
  """        
  for hero_info in combined_heros:
      keywords = get_keywords_array(hero_info)
      for key in keywords:
          add_to_index(index_array,key,hero_info)    
The Lookup Information by keywords # 
DEF the Lookup (index, keyword):
  "" "
  search based on keywords in the list
  " ""        
  for entry in index:
      IF entry [0] == keyword:
          return entry [1]
  #not the Find
  return entry [0]
search_index=[]
build_up_index(search_index)

 

 

lookup(search_index,"苏烈")

 

Guess you like

Origin www.cnblogs.com/Lilwhat/p/12431056.html