python:知乎发现页的爬取(问题,作者,回答)

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Yk_0311/article/details/82457509
import requests
from pyquery import PyQuery as pq

url = "https://www.zhihu.com/explore"
hd = {'User-Agent': 'Mozilla/5.0'}
try:
    r = requests.get(url, headers=hd)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
except:
    print('=====')

doc = pq(r.text)
items = doc('.explore-tab .feed-item').items()  # 找到class="explore-feed feed-item"的标签
# print(items)

for item in items:
    question = item.find('h2').text()  # 找到问题
    # print(question)
    author = item.find('.author-link-line').text()#找到作者
    # print(author)
    answer=pq(item.find('.content').html()).text()#找到回答
    # print(answer)
    with open("explore.txt",'a',encoding='utf-8') as f:#保存
        f.write('\n'.join([question,author,answer]))
        f.write('\n'+'='*50+'\n')

主要:pyquery解析库的使用

猜你喜欢

转载自blog.csdn.net/Yk_0311/article/details/82457509