23章 新闻汇总

今日练习Python基础教程第23章项目4:新闻汇总
运行顺利,代码如下:

from nntplib import NNTP,decode_header
from urllib.request import urlopen
import textwrap
import re


class NewsAgent:
    """
    可将新闻源中的新闻发布到新闻目的地的对象
    """

    def __init__(self):
        self.sources = []
        self.destinations = []

    def add_source(self, source):
        self.sources.append(source)

    def addDestination(self,dest):
        self.destinations.append(dest)

    def distribute(self):
        """
        从所有新闻源获取所有的新闻,并将其发布到所有的新闻目的地
        """

        items = []
        for source in self.sources:
            items.extend(source.get_items())
        for dest in self.destinations:
            dest.receive_items(items)

class NewsItem:
    """
    有标题何正文组成的简单新闻
    """

    def __init__(self,title,body):
        self.title = title
        self.body = body

class NNTPSource:
    """
    从NNTP新闻组获取新闻的新闻源
    """

    def __init__(self,servername,group,howmany):
        self.servername = servername
        self.group = group
        self.howmany = howmany

    def get_items(self):
        server = NNTP(self.servername)
        resp, count, first, last, name = server.group(self.group)
        start = last - self.howmany + 1
        resp, overviews = server.over((start,last))
        for id, over in overviews:
            title = decode_header(over['subject'])
            resp, info = server.body(id)
            body = '\n'.join(line.decode('latin1') for line in info.lines) +'\n\n'
            yield NewsItem(title, body)
        server.quit()

class SimpleWebSource:
    """
    使用正则表达式从网页提取新闻的新闻源
    """

    def __init__(self, url, title_pattern, body_pattern, encoding='utf8'):
        self.url = url
        self.title_pattern = re.compile(title_pattern)
        self.body_pattern = re.compile(body_pattern)
        self.endoding = encoding

    def get_items(self):
        text = urlopen(self.url).read().decode(self.encoding)
        titles = self.title_patter.findall(text)
        bodies = self.body_pattern.findall(text)
        for title, body in zip(titles, bodies):
            yield NewsItem(title,textwrap.fill(body) + '\n')
            # textwrap.fill与textwrap.wrap都能用,具体区别参考官方文档

class PlainDestination:
    """
    以纯文本方式显示所有新闻的新闻源
    """
    def receive_items(self, items):
        for item in items:
            print(item.title)
            print('-' * len(item.title))
            print(item.body)

class HTMLDestination:
    """
    以HTML格式显示所有新闻的新闻源
    """

    def __init__(self, filename):
        self.filename = filename

    def receive_items(self, items):

        out = open(self.filename, 'w')
        print("""
        <html>
          <head>
            <title>Today's News</title>
          </head>
          <body>
          <h1>Today's News</h1>
        """,file=out)

        print('<ul>', file = out)
        id = 0
        for item in items:
            id += 1
            print(' <li><a href="#{}">{}</a></li>'.format(id,item.title),file=out)
        print('</ul>',file = out)

        id = 0
        for item in items:
            id += 1
            print('<h2><a name="{}">{}</a></h2>'.format(id,item.title),file=out)
            print('<pre>{}</pre>'.format(item.body),file=out)

        print("""
          </body>
          </html>
          """,file=out)

def runDefaultSetup():#注意函数是顶头
    """
    来源和目标的默认位置。可以自己修改
    """
    agent = NewsAgent()

    # 从comp.lang.python.announce获取新闻的NNTPSource
    # Insert real server name
    clpa_server = 'web.aioe.org'
    clpa_group = 'comp.lang.python.announce' # alt.sex.telephone  comp.lang.python   gmane.comp.python.committers
    clpa_howmany = 10
    clpa = NNTPSource(clpa_server, clpa_group, clpa_howmany)

    agent.add_source(clpa)

    # 增加纯文本目标和HTML目标
    agent.addDestination(PlainDestination())
    agent.addDestination(HTMLDestination('news3.html'))

    # 发布新闻项目
    agent.distribute()

if __name__ == '__main__': runDefaultSetup()

Supongo que te gusta

Origin blog.csdn.net/u012477117/article/details/106115668
Recomendado
Clasificación