一、 使用Python爬取房价信息
1.话不多说先看代码
from bs4 import BeautifulSoup
import requests
class SpiderHours(object):
def __init__(self):
self.url = "http://www.creprice.cn/rank/cityforsale.html"
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36",
"Cookie": "cityredata=1245960d33d174122151e3280a2630a2; city=sq; userchannel=EL-cityhouse; Hm_lvt_c2a7a3cec6f9dd8849155424efab19c7=1562643454,1562643606,1562644057; Hm_lpvt_c2a7a3cec6f9dd8849155424efab19c7=1562644057"}
def request_page(self):
response = requests.get(self.url, headers=self.headers)
ret = response.content.decode("utf-8")
soup = BeautifulSoup(ret, features="lxml")
content = soup.find_all("th")
temp_list = list()
for info in content: # 遍历数据
temp_info = info.text.strip() # 删除\n转义符
temp_info = temp_info.replace(",", "") # 删除数据中的","
if len(temp_list) < 5: # 整理数据
temp_list.append(temp_info)
else:
with open("2019年全国房价.csv", "a") as f: # 写入csv文件
f.write(",".join(temp_list) + "\n")
print(",".join(temp_list))
temp_list = list()
temp_list.append(temp_info)
if __name__ == '__main__':
spider = SpiderHours()
spider.request_page()
获取到的数据:
序号,城市名称,平均单价(元/㎡),同比,环比
1,北京,63905,-0.23%,+0.2%
2,深圳,62987,+12.49%,+0.89%
3,上海,51877,-0.85%,-0.48%
4,厦门,45257,+2.61%,+1.84%
5,三亚,34185,-10.67%,-1.74%
6,广州,33858,+1.62%,+2.17%
7,南京,30540,+5.21%,-0.86%
8,杭州,30248,-6.3%,-4.26%
9,陵水,29052,-16.47%,-4.77%
10,福州,26145,-5.61%,-2.46%
…