python-python爬取豆果网(菜谱信息)

 1 #-*- coding = utf-8 -*-
 2 #获取豆果网图片
 3 import io
 4 from bs4 import BeautifulSoup
 5 import requests
 6 
 7 url = "https://www.douguo.com/cookbook/2029254.html"
 8 
 9 header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
10 html = requests.get(url,headers = header)
11 text = BeautifulSoup(html.content,"lxml")
12 img_title = text.select("#banner img")
13 imgg = img_title[0].get("src")
14 
15 
16 def get_img_data(ul):
17     htm = requests.get(ul,headers = header)
18     f =  open("1.jpg","wb")
19     f.write(htm.content)
20     f.close()
21 menu_img   = get_img_data(imgg)
22 menu_title_0 = text.select('.title.text-lips')[0].text
23 menu_intro   = text.select('.intro')[0].text
24 menu_title_1 = text.select('.mini-title')[0].text
25 menu_content_scname = text.find_all('span',class_='scname')
26 menu_content_scnum = text.find_all('span',class_='scnum')
27 menu_title_2 = text.select('.mini-title')[1].text
28 menu_step = text.select('.stepinfo')
29 
30 print(menu_title_0)
31 print(menu_intro)
32 print(menu_title_1)
33 count = 0
34 for i in menu_content_scname:
35     print(i.text," ",menu_content_scnum[count].text)
36     count = count + 1
37 print(menu_title_2)
38 for menu_step_i in menu_step:
39     print(menu_step_i.text)
View Code

猜你喜欢

转载自www.cnblogs.com/0526yao/p/10306119.html