爬虫爬取表情

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/zhao_5352269/article/details/83418837
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/10/26 15:27
# @Author  : jia.zhao
# @Desc    : 
# @File    : doutu_request.py
# @Software: PyCharm

from selenium import webdriver
import time
import urllib.request


browser = webdriver.Chrome()
# img_url_dic = {}
for i in range(1, 100):
    browser.get('https://www.doutula.com/photo/list/?page=%s' % str(i))
    time.sleep(4)
    eles = browser.find_elements_by_xpath('//div[@class="container_"]/div[@id="pic-detail"]/div[@class="row"]/div[@class="col-sm-9"]/div[@class="random_picture"]/ul[@class="list-group"]/li[@class="list-group-item"]/div[@class="page-content text-center"]/div/a[@class="col-xs-6 col-sm-3"]/img[@data-original]')
    names = browser.find_elements_by_xpath('//div[@class="container_"]/div[@id="pic-detail"]/div[@class="row"]/div[@class="col-sm-9"]/div[@class="random_picture"]/ul[@class="list-group"]/li[@class="list-group-item"]/div[@class="page-content text-center"]/div/a[@class="col-xs-6 col-sm-3"]/img[@data-original]')

    for j in range(len(eles)):
        url = eles[j].get_attribute('data-original')
        name = names[j].get_attribute('alt')
        # if url != None and not url :

            # img_url_dic[url] = ''
        url = url.split('!')[0]
        ext = url.split('.')[-1]
        filename = name + '.' + ext
        # 保存图片数据
        data = urllib.request.urlopen(url).read()
        f = open('image/' + filename, 'wb')
        f.write(data)
        f.close()
        # print(browser.page_source)
    time.sleep(5)

browser.close()
browser.quit()

猜你喜欢

转载自blog.csdn.net/zhao_5352269/article/details/83418837