python 爬取教务处通知

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/YangZuo_Chester/article/details/80036996

内容很简单,也没有难度
存下代码
(教务处主页更新后有点毛病

from datetime import datetime
from urllib.request import urlopen
from bs4 import BeautifulSoup
import time

#函数
def getTitle():
    global titleSet
    global todaysTitle
    html = urlopen("http://jwc.ahu.cn/main/index.asp")
    bsObj = BeautifulSoup(html.read(),"lxml")
    All_title = bsObj.findAll("td",{"height":"22"})
    for title in All_title:
        if 'title' in title.attrs:
            if title.attrs['title'] not in titleSet:
                titleTime=title.find("font",{"color":"#999999"}).get_text()
                if titleTime == dateToday:
                    todaysTitle=title.attrs['title']
                else:
                    print(titleTime + title.attrs['title'])
                    titleSet.add(title.attrs['title'])

#存储出现过标题的集合set

titleSet = set()
todaysTitle = ''
titleSet.add(todaysTitle)
dateToday = datetime.now().strftime('(%Y/%m/%d)')
#修改格式与教务处保持一致
if dateToday[9] == '0':
    dateToday = dateToday[:9] + dateToday[10:]
if dateToday[6] == '0':
    dateToday = dateToday[:6] + dateToday[7:]
#第一次加载
getTitle()
print('Load completely...waiting for news\n')
if todaysTitle not in titleSet:
    titleSet.add(todaysTitle)
    print('Today:\n'+dateToday + todaysTitle)

hour = datetime.now().strftime('%H')
while 1:
    #小时数变更的时候打印下 (证明程序没有卡死 hhh
    newHour = datetime.now().strftime('%H')
    if hour != newHour:
        print(newHour+':00')
        hour = newHour

    getTitle()
    if todaysTitle not in titleSet:
        titleSet.add(todaysTitle)
        print('Today:\n'+dateToday + todaysTitle)



    time.sleep(1800)

猜你喜欢

转载自blog.csdn.net/YangZuo_Chester/article/details/80036996
今日推荐