python爬虫系列(3.7-使用 bs4 爬取获取贵州农产品)

一、爬取数据步骤

1、爬取网站地址

2、实现代码

import requests

from bs4 import BeautifulSoup

class Food(object):

    def __init__(self):

        self.url = 'http://www.gznw.gov.cn/priceInfo/getPriceInfoByAreaId.jx?areaid=22572&page=1'

        self.headers = {

            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',

        }

    @property

    def get_html(self):

        """

        抓取网页

        :return:

        """

        response = requests.get(url=self.url, headers=self.headers)

        if response.status_code == 200:

            return response.text

        return ''

    def down_data(self):

        """

        下载数据

        :return:

        """

        soup = BeautifulSoup(self.get_html, 'lxml')

        table = soup.find('table', attrs={'class': 'table table-hover'})

        trs = table.find('tbody').find_all('tr')

        food_list = []

        for tr in trs:

            food_dict = {}

            tds = tr.find_all('td')

            name = tds[0].get_text()

            price = tds[1].get_text()

            address = tds[3].get_text()

            time = tds[4].get_text()

            food_dict['name'] = name

            food_dict['price'] = price

            food_dict['address'] = address

            food_dict['time'] = time

            food_list.append(food_dict)

        return food_list

if __name__ == "__main__":

    foo = Food()

    print(foo.down_data())

猜你喜欢

转载自blog.csdn.net/qq_40925239/article/details/83863215