python2下载汽车之家2018年年款高清图片

python2下载汽车之家2018年年款高清图片

import urllib2
import os
from bs4 import BeautifulSoup
import random
import urllib
import time
end =['A','B','C','D','E','F','G','H','I','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']#无J,会乱码

#end = ['J']
print('ok')
for e in end:
    url = "https://www.autohome.com.cn/grade/carhtml/%s.html"%(e)
    url_html = urllib2.urlopen(url).read()
    url_content = BeautifulSoup(url_html  ,'html.parser')#下载J时,注掉,'html.parser'
    names = url_content.find_all('h4')
    n=-1
    for i in url_content.find_all('a',attrs = {'id':True}):
        n= n+1
        name = names[n].text
        

        car_url ='https:'+ i.get('href')
        #car_url = 'https://car.autohome.com.cn/pic/series/145.html#pvareaid=103448'

        car_url_html = urllib2.urlopen(car_url).read()

        car_url_content = BeautifulSoup(car_url_html,'html.parser')
        try:
            classes = car_url_content.find_all('dl',attrs = {'class':"search-pic-cardl"})[0]
            years = classes.find_all('dt')
            indexs = ""


            for year in years:
                if '2018' in year.text:
                    indexs = years.index(year)
                    break
            if indexs == "":
                pass
            else:
                path = '2018_year_cartype/'+name
                if os.path.exists(path):
                    pass
                else:
                    os.makedirs(path)
                photourl_2018 = classes.find_all('ul')[indexs]
                for j in photourl_2018.find_all('a'):
                    time.sleep(0.3)
                    photourl_type2018='https://car.autohome.com.cn'+j.get('href')
                   
                    photourl_type = urllib2.urlopen(photourl_type2018).read()
                    photourl_type = BeautifulSoup(photourl_type,'html.parser')
                    for eve in photourl_type.find_all('a',attrs = {'href':True,'title':True,'target':"_blank"})[0:3]:#我只需要正面照
                        eve_url = 'https://car.autohome.com.cn'+eve.get('href')
                        
                        eve_html = url_html = urllib2.urlopen(eve_url).read()
                        eve_content = BeautifulSoup(eve_html  ,'html.parser')
                        photo_url = eve_content.find_all('img',attrs = {"id":'img','src':True})[0].get('src')
                        carphoto_name = str(random.uniform(0,30))+'.jpg'
                        if 'https:' in photo_url:
                            time.sleep(0.3)
                            
                            urllib.urlretrieve(photo_url,path+'/'+carphoto_name)
                        else:
                            time.sleep(0.3)
                            photo_url = 'https:'+photo_url
                            
                            urllib.urlretrieve(photo_url,path+'/'+carphoto_name)
                            
        except:
            print(name)
            pass
print('end')

猜你喜欢

转载自blog.csdn.net/qq_34496674/article/details/88312614