requests模块:第三方模块,需要经过安装才可使用,用来获取网页请求操作。
BeautifulSoup模块:也是第三方模块,通过requests获取网页解析内容进行读取。
案例说明:通过输入查询软件名称爬取所查询软件的下载量信息。
案例代码:
import requests
from bs4 import BeautifulSoup
def zhushou(name):
'在360手机助手上'
url = 'http://zhushou.360.cn/search/index/?kw=' + name
# 获取网页源码
webData = requests.get(url)
# 解析网页
soup = BeautifulSoup(webData.text, 'lxml')
# 通过[0]将列表转换为字符串。
downloads = soup.select('body > div.warp > div.main > div > ul > li> div > div.sdlft > p.downNum')[0].get_text()[
:-3]
print('在360手机助手上,%s的下载量:%s。' % (name, downloads))
def yingyonghui(name):
'在应用汇上'
url = 'http://www.appchina.com/sou/?keyword=' + name
webData = requests.get(url)
soup = BeautifulSoup(webData.text, 'lxml')
downloads = soup.select('#left > ul > li> div.app-info > span.download-count')[0].get_text()
update = soup.select('#left > ul > li> div.app-info > span.update-date')[0].get_text()
print('在应用汇上,%s的下载量:%s,%s。' % (name, downloads, update))
def anzhuo(name):
'在安卓市场上'
url = 'https://shouji.baidu.com/s?wd=' + name + '&data_type=app&f=header_app%40input'
webData = requests.get(url)
soup = BeautifulSoup(webData.text, 'lxml')
downloads = soup.select('#doc > div.yui3-g > div > div > ul > li > div > div.info > div:nth-child(3) > em > span')[
0].get_text()[:-3]
# downloads.encode('utf-8').decode('unicode_escape')
downsize = soup.select('#doc > div.yui3-g > div > div > ul > li > div > div.info > div:nth-child(3)'
' > span.size')[0].get_text()
print('在安卓市场上,%s的下载量:%s,%s。' % (name, downloads, downsize))
if __name__ == '__main__':
name = input('请输入您要查询的软件:')
zhushou(name)
yingyonghui(name)
anzhuo(name)
执行结果: