python-web - download all xkcd comics

download all xkcd comics

 

# downloads every single xkcd comic

import requests,os,bs4
url='http://xkcd.com'  # start url
os.makedirs('xkcd',exist_ok=True) # store comics in ./xkcd
while not url.endswith('#'):
    # todo:download the page 
    print('downloading page %s...'%url)
    res = requests.get(url)
    res.raise_for_status()

    soup = bs4.BeautifulSoup(res.text)

    # todo find the url of the comic image
    comicElem = soup.select('#comic img')
    if comicElem == []:
        print('could not find comic image')
    else:
        comicUrl = 'http:'+comicElem[0].get('src')
        # todo: download the iamge
        print('downloading image %s .... '%(comicUrl))
        res = requests.get(comicUrl)
        res.raise_for_status()

        # todo: save the image to ./xkcd
        imageFile = open(os.path.join('xkcd',os.path.basename(comicUrl)),'wb')

        for chunk in res.iter_content(100000):
             imageFile.write(chunk)
        imageFile.close()


    # todo: get the prev button'url
    prevLink = soup.select('a[rel="prev"]')[0]
    url = 'http://xkcd.com'+prevLink.get('href')

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325314841&siteId=291194637