【Python】抓取电影海报并下载

关于BeautifulSoup   :
soup.find_all(class_='v_picConBox mt15')
tag.find("div",{"class": "pic"})  
pic.img['data-src']
title =text.span.em.a['title']
#coding=utf-8
import re,os
from urllib.request  import urlretrieve 
import requests
from bs4 import BeautifulSoup

def geturl(url):
    html=requests.get(url).content
    soup=BeautifulSoup(html,'lxml')
    return soup

#抓取电影海报
soup=geturl('http://dianying.2345.com/list/kehuan------.html')
bookAlbum=soup.title.string.split('_')[0]
# print(bookAlbum)
tags=soup.find_all(class_='v_picConBox mt15')
# print(tags)
movies=[]
for tag in tags[0].find_all('li'): 
	pic=  tag.find("div",{"class": "pic"})  
	if  pic is None:
		continue
	text = tag.find("div",{"class": "txtPadding"}) 
	img_url = pic.img['data-src']
	title =text.span.em.a['title']
	# print(title,img_url)
	movies.append([title,img_url])
	print(movies)
if not os.path.exists(bookAlbum):
    os.makedirs(bookAlbum)
    for movie in movies:
        filename=os.path.join(bookAlbum,movie[0]+'.png')
        print(filename)
        with open(filename,'w') as f:
            urlretrieve('http:'+movie[1],filename)
            

 

猜你喜欢

转载自blog.csdn.net/Allure_LoveU/article/details/80689779