Xinfadi market price crawling

Disclaimer: The code is only for learning and communication purposes. The sharer and creator of the code do not assume any responsibility caused by the malicious operation of others. Do not modify the frequency-limiting parameters without authorization, do not maliciously attack the webpage, please learn to observe the social ethics and legal order , The computer operator shall be fully responsible for the loss of webpage crashes caused by the crawler, and the criminal responsibility shall be borne if serious consequences are caused.

import requests
from lxml import etree
from time import sleep
from fake_useragent import UserAgent
import pandas as pd

name_all = []
min_price_all = []
mean_price_all = []
max_price_all = []
guige_all = []
unit_all = []
data_all = []


for i in range(13902):
    print(i)
    UA = UserAgent().edge
    url_base='http://www.xinfadi.com.cn/marketanalysis/0/list/{}.shtml'
    url = 'http://www.xinfadi.com.cn/marketanalysis/0/list/{}.shtml'.format(i+1)
    headers={
    
    
        'User-Agent':UA
    }
    response=requests.get(url,headers=headers)
    sleep(3)
 #   print(response.text)
 #   print(response.encoding)
    e = etree.HTML(response.text)
    name = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[1]/text()''')
    min_price = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[2]/text()''')
    mean_price = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[3]/text()''')
    max_price = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[4]/text()''')
    guige = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[5]/text()''')
    unit = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[6]/text()''')
    data = e.xpath('''//table[@class='hq_table']/tr[position()>1]/td[7]/text()''')

    name_all = name_all+name
    min_price_all = min_price_all+min_price
    mean_price_all = mean_price_all+mean_price
    max_price_all=max_price_all+max_price
    guige_all = guige_all+guige
    unit_all = unit_all+unit
    data_all=data_all+data
    if i % 300 == 0:
        all_info = {
    
    
                    '名称': name_all,
                    '最低价格': min_price_all,
                    '平均价格': mean_price_all,
                    '最高价格': max_price_all,
                    '规格': guige_all,
                    '单位':unit_all,
                    '日期':data_all
                }
        outdata = pd.DataFrame(all_info)
        outdata.to_csv('C:\\Users\\Admin\\PycharmProjects\\untitled\\新发地市场价格.csv', encoding='GBK')




Guess you like

Origin blog.csdn.net/qq_42830971/article/details/111833036