python爬取codeforce题目

版权声明:转载请标明出处 https://blog.csdn.net/kumu28/article/details/85268096

老oj SGU搬家到codeforce上了为了做题方便就将题目爬取下来了 不会将HTML文件PDF化 但只需Ctrl+p打印即可

题目代码是老网站上的格式不是很好只能这样了

下面上码(只能爬取SGU板块的题,其他的题csdn都有)

# -*- coing:utf-8 -*-

import requests
import urllib.request
from bs4 import BeautifulSoup

fweb = open('an.html','w')

def Find_an(soup,url):
    div_ul = soup.find('div',{"class",'problemindexholder'}) #存放题目的<div>
    try :
        img_ul = div_ul.find_all('img') #查找<img>并如果div_ul为空弹出异常
    except :
        return 0
    for img in img_ul:
        url = img['src']  
        url = "http://codeforces.com" + url  #图片地址
        r = requests.get(url, stream=True)  #下载
        image_name = url.split('/')[-1]
        #print(image_name)
        img['src'] = image_name  #相对地址转到本地
        with open('./%s' % image_name, 'wb') as f:  #保存图片
            for chunk in r.iter_content(chunk_size=128):
                f.write(chunk)
        #print('Saved %s' % image_name)
    fweb.write(div_ul.prettify(formatter="html"))  #将题目以HTML方式保存
    return 1

def main():
    Url = "http://codeforces.com/problemsets/acmsguru/problem/99999/"
    #Problem = input().split(' ') 手动输入题号
    #for i in Problem:
    for i in range(100,101):  #循环输入
        url = Url + str(i)
        print(url)
        html = requests.get(url).text  #提取网页内容
        soup = BeautifulSoup(html,'lxml') #汤
        cnt = Find_an(soup,url) 
        if cnt == 1 : print('yes')
        else : print('no')

if __name__ == '__main__':
    main()

猜你喜欢

转载自blog.csdn.net/kumu28/article/details/85268096
今日推荐