爬虫爬妹子图

代码,待优化

import requests
from bs4 import BeautifulSoup

url = "http://di81.com/PicList?pageindex=1"
response = requests.get(url)
response.encoding = 'utf8'
soup = BeautifulSoup(response.text, 'html.parser')
ul = soup.find(name="ul", attrs={'id': 'pins'})
a_list = ul.find_all(name='a')

for a in a_list:
    img = a.find('img')
    if img:
        continue
    href = a.attrs.get('href')
    title = a.text
    img_url = 'http://di81.com' + href
    img_response = requests.get(img_url)
    soup2 = BeautifulSoup(img_response.text, 'html.parser')
    div = soup2.find(name='div', attrs={'class': 'main-image'})
    img2_list = div.find_all('img')
    for img2 in img2_list:
        src2 = img2.attrs.get('src')
        img_src = "http://di81.com" + src2
        src_response = requests.get(img_src)
        img_con = src_response.content
        path = r'D:\05_Code\SpiderTest\img'
        file_name = img_src.rsplit('/', maxsplit=1)[1]
        file_path = path + "\\" + title + file_name
        with open(file_path, 'wb') as f:
            f.write(img_con)
        print(file_path)

  

猜你喜欢

转载自www.cnblogs.com/0bug/p/11665807.html