python爬取煎蛋网妹子图,已解密图片~~~~~

本来想爬一波无聊图,唉,竟然加密了。。。。

还好是base64

不说了,代码献上 2018.12.14 有效。。。。。

import requests
from bs4 import BeautifulSoup
import base64,time

base64_list = []
print('====开始爬取=====')
starttime = time.time()
for i in range(50,79):
    url = 'http://jandan.net/ooxx/page-{}#comments'.format(i)
# url = 'http://jandan.net/ooxx/page-50#comments'
    r = requests.get(url=url, headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
        })
    r.encoding = 'utf-8'
    soup = BeautifulSoup(r.text, 'lxml')
    ol = soup.find(name='ol',attrs={"class":"commentlist"})
    li_List = ol.find_all(name='li')
    for li in li_List:
        try:
            p = li.find(name='p')
            if '\n' in p.text:
                continue
            # print(p.text)
            base64_list.append(p.text)
        except AttributeError:
            continue
endtime = time.time()
inttime = endtime - starttime
print('=====爬取结束====\n用时{}秒'.format(inttime))
print('=====开始解析====')
full_url = []  #解析完毕地址url
for www in base64_list:
    try:
        k = base64.b64decode(www)
        b = 'http:' + k.decode()
        # print(b)
        full_url.append(b)
    except ValueError:
        print(1)
print('=====解析完毕=====')
print('===载入本地文件夹===')

image_start_time = time.time()
for index in range(0, len(full_url)):
    item = full_url[index]
    full_path = 'jdimg/' + str(index) + '.jpg'
    k = requests.get(item).content
    # print(k)
    with open(full_path,'wb') as f:
        f.write(k)
end_time_time = time.time()
print('用时%.2s秒'%(end_time_time-image_start_time))
# print(full_url)

猜你喜欢

转载自www.cnblogs.com/xxy614899502/p/10120209.html
今日推荐