Python爬取腾讯漫画《一人之下》

直接上代码吧

import requests
from lxml import etree
from selenium import webdriver
import time
import os
import re
import urllib.request

NUMBER=0
def getImg(browser,page=1):
    try:
        url = 'https://ac.qq.com/ComicView/index/id/531490/cid/{}'.format(page)
        browser.get(url)

        #判断
        isErr = isErrerView(browser,url)
        if(isErr==404):
            page+=1
            getImg(browser,page)
        else:
            page=int(isErr)
            #创建文件夹
            os.mkdir("E:/Temp/yrzx/第"+str(page)+"章")
        execDef(browser);
        re = browser.page_source
        html = etree.HTML(re)
        #获取所有a标签下的img节点
        result = html.xpath('//ul[@class="comic-contain"]/li/img/@src')
        print("第"+str(page)+"章一共有"+str(len(result))+"张图")
        downImg(page,result)
    except Exception as e:
        print("--抛出异常--")

def isErrerView(browser,url):
    browser.get(url)
    title = browser.title	#获取页面title
    st = "错误提示"
    iserrer = st in title
    #如果是错误页
    if(iserrer):
        return 404
    else:
        test = re.findall(r"\d+",title)
        return test[0]

def execDef(driver):
    n = 20
    for i in range(1, n + 1):
        time.sleep(0.5)
        js = 'var q=document.getElementById("mainView").scrollTop=' + str(i * 1500)
        driver.execute_script(js)

def downImg(page,list):
    global NUMBER
    j=1
    root = 'E:/Temp/yrzx/第'+str(page)+'章'
    for x in list:
        r = requests.get(x)
        with open(root+'/'+str(j)+'.jpg', 'wb') as f:
            f.write(r.content)
            f.close()
            print("文件保存成功=="+str(page)+"-"+str(j))
        j+=1
        NUMBER+=1
        print("累计下载-"+str(NUMBER))

def main():
    a, b = map(int, input("请输入章节范围:").split('-'))
    browser = webdriver.Chrome()
    for x in range(a,b):
        getImg(browser,x)
    browser.close()

if __name__=="__main__":
    main()
发布了21 篇原创文章 · 获赞 2 · 访问量 6492

猜你喜欢

转载自blog.csdn.net/weixin_43386443/article/details/99325616