python爬虫获取温州瓯海区新闻网的指定内容

import requests
from bs4 import BeautifulSoup


def get_html(url):
    headers = {
        'User-Agent': 'Mozilla / 5.0(Linux;Android6.0;Nexus5Build / MRA58N) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 70.0.3538.25MobileSafari / 537.36'

    }  # 模拟浏览器访问
    response = requests.get(url, headers=headers)  # 请求访问网站
    html = response.text  # 获取网页源码
    return html  # 返回网页源码


print("瓯海区新闻网")
soup = BeautifulSoup(get_html('http://www.ohnews.cn/news/oh/index.shtml'), 'lxml')  # 初始化BeautifulSoup库,并设置解析器
for li in soup.find_all('div', class_='list_e'):  # 遍历父节点
    for li2 in soup.find_all('li'):
        print(li2)

猜你喜欢

转载自blog.csdn.net/fdsgfd43432/article/details/107740559
今日推荐