类装饰器爬取段子

import requests
from lxml import etree


class Request(object):
    """请求类"""

    def __init__(self, func):
        self.func = func

    def __call__(self, url, headers, data=None):
        resp = requests.get(url, headers=headers)
        resp.encoding = resp.apparent_encoding  # 设置响应的编码
        content = resp.text
        self.func(content)


@Request # get_content=Request(get_content) 对象是否可调用取决于__call__
def get_content(content):
	"""解析网页内容"""
    htm = etree.HTML(content)
    div_list = htm.xpath('//div[@class="one-cont"]')
    for div in div_list:
        item  = dict()
        item["nick_name"] = div.xpath('.//i/text()')[0]
        item['href'] = ''.join(['https://www.xiaohua.com',div.xpath('.//a/@href')[0]])
        item['con']= div.xpath('.//p[@class="fonts"]/a/text()')[0]
        print(item)
num = 1
while num <=100:
    url = f"https://www.xiaohua.com/duanzi/?page={num}"
    headers = {'User-Agent': "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"}
    get_content(url, headers=headers)
    num+=1
发布了127 篇原创文章 · 获赞 25 · 访问量 3万+

猜你喜欢

转载自blog.csdn.net/weixin_44224529/article/details/104235815