版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/zhao_5352269/article/details/83418837
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/10/26 15:27
# @Author : jia.zhao
# @Desc :
# @File : doutu_request.py
# @Software: PyCharm
from selenium import webdriver
import time
import urllib.request
browser = webdriver.Chrome()
# img_url_dic = {}
for i in range(1, 100):
browser.get('https://www.doutula.com/photo/list/?page=%s' % str(i))
time.sleep(4)
eles = browser.find_elements_by_xpath('//div[@class="container_"]/div[@id="pic-detail"]/div[@class="row"]/div[@class="col-sm-9"]/div[@class="random_picture"]/ul[@class="list-group"]/li[@class="list-group-item"]/div[@class="page-content text-center"]/div/a[@class="col-xs-6 col-sm-3"]/img[@data-original]')
names = browser.find_elements_by_xpath('//div[@class="container_"]/div[@id="pic-detail"]/div[@class="row"]/div[@class="col-sm-9"]/div[@class="random_picture"]/ul[@class="list-group"]/li[@class="list-group-item"]/div[@class="page-content text-center"]/div/a[@class="col-xs-6 col-sm-3"]/img[@data-original]')
for j in range(len(eles)):
url = eles[j].get_attribute('data-original')
name = names[j].get_attribute('alt')
# if url != None and not url :
# img_url_dic[url] = ''
url = url.split('!')[0]
ext = url.split('.')[-1]
filename = name + '.' + ext
# 保存图片数据
data = urllib.request.urlopen(url).read()
f = open('image/' + filename, 'wb')
f.write(data)
f.close()
# print(browser.page_source)
time.sleep(5)
browser.close()
browser.quit()