代码,使用xpath筛选
import requests
from lxml import etree
#词汇表
words = []
def shanbei(page):
url ='https://www.shanbay.com/wordlist/104899/202159/?page=%s'%page
print(url)
rsp = requests.get(url)
html = rsp.text()
html = etree.HTML(html)
#查找所有tr标签内容
tr_list = html.xpath('//tr')
for tr in tr_list:
word = {}
#查找单词
strong = tr.xpath('.//strong')
if len(strong):
name = strong[0].text.strip()
word['name'] = name
#查找单词内容
td_content = tr.xpath('./td[@class="span10"]')
if len(td_content):
content = td_content[0].text.strip()
word['content'] = content
if word != {}:
words.append(word)
if __name__ == '__main__':
#页数第一页,可以自己定义
shanbei(1)
print(words)
```