学习python篇之使用python统计中国主要城市房价工资比排行榜

学习python篇之使用python统计中国主要城市房价工资比排行榜

需要用到的模块

requests模块
bs4模块
re模块
openpyxl模块

开发思路

使用python下载网页,永远只有一个最佳的选择--Requests模块
如果缺少可使用一下命令下载即可:
pip install pipenv
pipenv install requests

以下是编写的完整代码

# -*- codeing = utf-8 -*-
# @Time : 2021/3/19 9:30
# @Author : 霍義
# @File : fnangjia.py
# @Software : PyCharm
import requests
import bs4
import re
import openpyxl
def open_url(url):
    headers = {
    
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"}
    res = requests.get(url, headers=headers)

    return res

def find_data(res):
    data = []
    soup = bs4.BeautifulSoup(res.text, "html.parser")
    content = soup.find(id="Cnt-Main-Article-QQ")
    target = content.find_all("p", style="TEXT-INDENT: 2em")
    target = iter(target)
    for each in target:
        if each.text.isnumeric():
            data.append([
                re.search(r'\[(.+)\]', next(target).text).group(1),
                re.search(r'\d.*', next(target).text).group(),
                re.search(r'\d.*', next(target).text).group(),
                re.search(r'\d.*', next(target).text).group()])
    return data

def to_excel(data):
    wb = openpyxl.Workbook()
    wb.guess_types = True
    ws = wb.active
    ws.append(['城市','平均房价','平均工资','房价工资比'])
    for each in data:
        ws.append(each)

    wb.save("中国主要城市房价工资比排行榜.xlsx")

def main():
    url = "https://news.house.qq.com/a/20170702/003985.htm"
    res = open_url(url)
    data = find_data(res)
    to_excel(data)

    with open("test.txt", "w", encoding="utf-8") as file:
        file.write(res.text)
if __name__ == "__main__":
    main()

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/HYXRX/article/details/114999081