import requests
from bs4 import BeautifulSoup
# 1、发送请求
def get_page(url):
response = requests.get(url)
return response
# 解析主页
def parse_index(data):
soup = BeautifulSoup(data, 'lxml')
# 获取所有app的li标签
app_list = soup.find_all(name='li', attrs={"class": "card"})
for app in app_list:
app_name = soup.find(name="a", attrs={"class": "name"}).text
print(app_name)
# 下载次数
# 获取class为install-count的span标签中的文本
down_num = app.find(name='span', attrs={"class": "install-count"}).text
print(down_num)
import re
# 大小
# 根据文本正则获取到文本中包含 数字 + MB(\d+代表数字)的span标签中的文本
size = soup.find(name='span', text=re.compile("\d+MB")).text
print(size)
# 详情页地址
detail_url = app.find(name='a').attrs['href']
print(detail_url)
def main():
for line in range(1, 33):
url = f"https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page={line}&ctoken=FRsWKgWBqMBZLdxLaK4iem9B"
# 1、往app接口发送请求
response = get_page(url)
print('*' * 1000)
# 反序列化为字典
data = response.json()
# 获取接口中app标签数据
app_li = data['data']['content']
# print(app_li)
# 2、解析app标签数据
parse_index(app_li)
if __name__ == '__main__':
main()