Python之requests爬取网页数据案例。

requests模块:第三方模块,需要经过安装才可使用,用来获取网页请求操作。

BeautifulSoup模块:也是第三方模块,通过requests获取网页解析内容进行读取。

案例说明:通过输入查询软件名称爬取所查询软件的下载量信息。

案例代码:

import requests
from bs4 import BeautifulSoup


def zhushou(name):
    '在360手机助手上'
    url = 'http://zhushou.360.cn/search/index/?kw=' + name
    # 获取网页源码
    webData = requests.get(url)
    # 解析网页
    soup = BeautifulSoup(webData.text, 'lxml')
    # 通过[0]将列表转换为字符串。
    downloads = soup.select('body > div.warp > div.main > div > ul > li> div > div.sdlft > p.downNum')[0].get_text()[
                :-3]
    print('在360手机助手上,%s的下载量:%s。' % (name, downloads))


def yingyonghui(name):
    '在应用汇上'
    url = 'http://www.appchina.com/sou/?keyword=' + name
    webData = requests.get(url)
    soup = BeautifulSoup(webData.text, 'lxml')

    downloads = soup.select('#left > ul > li> div.app-info > span.download-count')[0].get_text()
    update = soup.select('#left > ul > li> div.app-info > span.update-date')[0].get_text()
    print('在应用汇上,%s的下载量:%s,%s。' % (name, downloads, update))


def anzhuo(name):
    '在安卓市场上'
    url = 'https://shouji.baidu.com/s?wd=' + name + '&data_type=app&f=header_app%40input'
    webData = requests.get(url)
    soup = BeautifulSoup(webData.text, 'lxml')
    downloads = soup.select('#doc > div.yui3-g > div > div > ul > li > div > div.info > div:nth-child(3) > em > span')[
                    0].get_text()[:-3]
    # downloads.encode('utf-8').decode('unicode_escape')
    downsize = soup.select('#doc > div.yui3-g > div > div > ul > li > div > div.info > div:nth-child(3)'
                           ' > span.size')[0].get_text()
    print('在安卓市场上,%s的下载量:%s,%s。' % (name, downloads, downsize))


if __name__ == '__main__':
    name = input('请输入您要查询的软件:')

    zhushou(name)
    yingyonghui(name)
    anzhuo(name)

执行结果:

猜你喜欢

转载自blog.csdn.net/qq_39979646/article/details/104510843
今日推荐