Not climb this forum html source code, should be related to anti-climbing technology, and then later to address, as follows
import requests from lxml import etree import json class BtcSpider(object): def __init__(self): self.headers = { "User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36' } self.base_url = "https://www.chainnode.com/forum/2" self.data_list = [] # 获取数据 def get_data(self, url): response = requests.GET (URL, headers = self.headers) Data = response.content.decode () return Data # parsed data def parse_data (self, data): Type # conversion data x_data = etree.HTML (Data) # accordance path resolution title_list x_data.xpath = ( ' // A [@ class = "Link-Dark-Major Bold font-BBT-Block"] / text () ' ) URL_LIST = x_data.xpath ( ' // A [@ class = "Link- Bold font-Major-Dark BBT-Block "] / @ the href ' ) URL_LIST = [ " https://www.chainnode.com " + i for i in url_list] for index, title in enumerate(title_list): news = {} news['name'] = title news['url'] = url_list[index] self.data_list.append(news) # 保存数据 def save_data(self): data_str = json.dumps(self.data_list) with open('03-btc.html', 'w') asF: f.write (data_str) # start DEF RUN (Self): for I in Range ( . 1 , . 5 ): # stitching complete URL URL = self.base_url IF I == . 1 the else self.base_url STR + (- I) # transmission request data = self.get_data (URL) # parse data self.parse_data (data) self.save_data () BtcSpider (). RUN ()