Disclaimer: The code is only for learning and communication purposes. The sharer and creator of the code do not assume any responsibility caused by the malicious operation of others. Do not modify the frequency-limiting parameters without authorization, do not maliciously attack the webpage, please learn to observe the social ethics and legal order , The computer operator shall be fully responsible for the loss of webpage crashes caused by the crawler, and the criminal responsibility shall be borne if serious consequences are caused.
import requests
from lxml import etree
from time import sleep
from fake_useragent import UserAgent
import pandas as pd
name_all = []
min_price_all = []
mean_price_all = []
max_price_all = []
guige_all = []
unit_all = []
data_all = []
for i in range(13902):
print(i)
UA = UserAgent().edge
url_base='http://www.xinfadi.com.cn/marketanalysis/0/list/{}.shtml'
url = 'http://www.xinfadi.com.cn/marketanalysis/0/list/{}.shtml'.format(i+1)
headers={
'User-Agent':UA
}
response=requests.get(url,headers=headers)
sleep(3)
# print(response.text)
# print(response.encoding)
e = etree.HTML(response.text)
name = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[1]/text()''')
min_price = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[2]/text()''')
mean_price = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[3]/text()''')
max_price = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[4]/text()''')
guige = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[5]/text()''')
unit = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[6]/text()''')
data = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[7]/text()''')
name_all = name_all+name
min_price_all = min_price_all+min_price
mean_price_all = mean_price_all+mean_price
max_price_all=max_price_all+max_price
guige_all = guige_all+guige
unit_all = unit_all+unit
data_all=data_all+data
if i % 300 == 0:
all_info = {
'名称': name_all,
'最低价格': min_price_all,
'平均价格': mean_price_all,
'最高价格': max_price_all,
'规格': guige_all,
'单位':unit_all,
'日期':data_all
}
outdata = pd.DataFrame(all_info)
outdata.to_csv('C:\\Users\\Admin\\PycharmProjects\\untitled\\新发地市场价格.csv', encoding='GBK')