今日练习Python基础教程第23章项目4:新闻汇总
运行顺利,代码如下:
from nntplib import NNTP,decode_header
from urllib.request import urlopen
import textwrap
import re
class NewsAgent:
"""
可将新闻源中的新闻发布到新闻目的地的对象
"""
def __init__(self):
self.sources = []
self.destinations = []
def add_source(self, source):
self.sources.append(source)
def addDestination(self,dest):
self.destinations.append(dest)
def distribute(self):
"""
从所有新闻源获取所有的新闻,并将其发布到所有的新闻目的地
"""
items = []
for source in self.sources:
items.extend(source.get_items())
for dest in self.destinations:
dest.receive_items(items)
class NewsItem:
"""
有标题何正文组成的简单新闻
"""
def __init__(self,title,body):
self.title = title
self.body = body
class NNTPSource:
"""
从NNTP新闻组获取新闻的新闻源
"""
def __init__(self,servername,group,howmany):
self.servername = servername
self.group = group
self.howmany = howmany
def get_items(self):
server = NNTP(self.servername)
resp, count, first, last, name = server.group(self.group)
start = last - self.howmany + 1
resp, overviews = server.over((start,last))
for id, over in overviews:
title = decode_header(over['subject'])
resp, info = server.body(id)
body = '\n'.join(line.decode('latin1') for line in info.lines) +'\n\n'
yield NewsItem(title, body)
server.quit()
class SimpleWebSource:
"""
使用正则表达式从网页提取新闻的新闻源
"""
def __init__(self, url, title_pattern, body_pattern, encoding='utf8'):
self.url = url
self.title_pattern = re.compile(title_pattern)
self.body_pattern = re.compile(body_pattern)
self.endoding = encoding
def get_items(self):
text = urlopen(self.url).read().decode(self.encoding)
titles = self.title_patter.findall(text)
bodies = self.body_pattern.findall(text)
for title, body in zip(titles, bodies):
yield NewsItem(title,textwrap.fill(body) + '\n')
# textwrap.fill与textwrap.wrap都能用,具体区别参考官方文档
class PlainDestination:
"""
以纯文本方式显示所有新闻的新闻源
"""
def receive_items(self, items):
for item in items:
print(item.title)
print('-' * len(item.title))
print(item.body)
class HTMLDestination:
"""
以HTML格式显示所有新闻的新闻源
"""
def __init__(self, filename):
self.filename = filename
def receive_items(self, items):
out = open(self.filename, 'w')
print("""
<html>
<head>
<title>Today's News</title>
</head>
<body>
<h1>Today's News</h1>
""",file=out)
print('<ul>', file = out)
id = 0
for item in items:
id += 1
print(' <li><a href="#{}">{}</a></li>'.format(id,item.title),file=out)
print('</ul>',file = out)
id = 0
for item in items:
id += 1
print('<h2><a name="{}">{}</a></h2>'.format(id,item.title),file=out)
print('<pre>{}</pre>'.format(item.body),file=out)
print("""
</body>
</html>
""",file=out)
def runDefaultSetup():#注意函数是顶头
"""
来源和目标的默认位置。可以自己修改
"""
agent = NewsAgent()
# 从comp.lang.python.announce获取新闻的NNTPSource
# Insert real server name
clpa_server = 'web.aioe.org'
clpa_group = 'comp.lang.python.announce' # alt.sex.telephone comp.lang.python gmane.comp.python.committers
clpa_howmany = 10
clpa = NNTPSource(clpa_server, clpa_group, clpa_howmany)
agent.add_source(clpa)
# 增加纯文本目标和HTML目标
agent.addDestination(PlainDestination())
agent.addDestination(HTMLDestination('news3.html'))
# 发布新闻项目
agent.distribute()
if __name__ == '__main__': runDefaultSetup()