Daily exercise reptiles - climb recipes

Wanted to learn programming learning to ride a bicycle, the novice most important thing is perseverance exercise.
See in the "underground water" this chapter sentence: "Do not worry about his lack of talent or ability constant practice, talent will be growth." Now think about it, this is the case.

'''
爬虫练习 ---下厨房

version:01
author:金鞍少年
date:2020-02-24

'''

import requests
from bs4 import BeautifulSoup
import re


class xiachufang():

    def __init__(self):
        self.count = 1  # 计数
        self.comp = re.compile('[^A-^a-z^0-9^\u4e00-\u9fa5]')  # 去掉抓取标题中特殊字符
        self.headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',
            'referer': 'http://www.xiachufang.com/category/52107/'
        }
    def get_url(self):
        while self.count <= 5:
            url = 'http://www.xiachufang.com/category/52107/?page='+str(self.count)  # 分页url
            self.count += 1
            re = requests.get(url, headers=self.headers)
            if re.status_code == 200:
                page_data = BeautifulSoup(re.text, 'html.parser')
                yield page_data
            else:
                print('链接失败!')
    def get_data(self,page_data):
        Menu_table = page_data.find('div', class_="normal-recipe-list").findAll("div",class_="info pure-u")
        for index,meun in enumerate(Menu_table):
            tag_a = meun.find('a')
            foods_name = (tag_a.text[17:-13])
            foods_name = self.comp.sub('', foods_name)  # 去掉标题中特殊字符
            Foodstuff = meun.find('p', class_="ing ellipsis").text[1:-1]
            foods_url = r'http://www.xiachufang.com/'+tag_a['href']
            foods = (self.count, index, foods_name, Foodstuff, foods_url)
            yield foods

    def Save_foods(self, foods):
        food_name = ('%s-%s-%s'%((foods[0]-1), foods[1], foods[2]))
        food_content = '食材:'+foods[3]+'\n链接:'+foods[4]

        path = r'./下厨房/'


        with open('./下厨房/'+food_name+'.txt', 'w', encoding='utf-8') as f:
            f.write(food_content)
            print('下载 %s 成功' % food_name)

    def fun(self):
        for page_data in self.get_url():
            for foods in self.get_data(page_data):
                self.Save_foods(foods)

if __name__=='__main__':
    x = xiachufang()
    x.fun()


Published 46 original articles · won praise 37 · views 4518

Guess you like

Origin blog.csdn.net/weixin_42444693/article/details/104504180