import requests import csv from bs4 import BeautifulSoup headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36 Maxthon/5.2.6.1000'} link='https://fz.anjuke.com/sale/' r=requests.get(link,headers=headers) soup=BeautifulSoup(r.text,'lxml') house_list=soup.find_all('li',class_="list-item") with open('test.csv', 'a',newline='',encoding='utf-8-sig')as csvfile: w=csv.writer(csvfile) w.writerow(('标题','价格','均价','面积','楼层')) for house in house_list: temp = [] name=house.find('div',class_='house-title').a.text.strip() price=house.find('div',class_='pro-price').contents[1].text.strip() price_ave=house.find('div',class_='pro-price').contents[2].text.strip() area=house.find('div',class_='details-item').span.text floor=house.find('div',class_='details-item').contents[5].text temp=[name,price,price_ave,area,floor] print(temp) w.writerow(temp)
几个注意点:
1、with open('test.csv', 'a',newline='',encoding='utf-8-sig')as csvfile:,注意utf8转码,否则数据保存本地会为乱码形式
2、插入标题的方式,数组的写入