Python爬虫requests之扇贝单词

代码,使用xpath筛选

import requests
from lxml import etree
#词汇表
words = []

def shanbei(page):
    url ='https://www.shanbay.com/wordlist/104899/202159/?page=%s'%page
    print(url)
    
    rsp = requests.get(url)
    html = rsp.text()
    html = etree.HTML(html)
    #查找所有tr标签内容
    tr_list = html.xpath('//tr')
    for tr in tr_list:
        word = {} 
        #查找单词
        strong = tr.xpath('.//strong')
        if len(strong):
            name = strong[0].text.strip()
            word['name'] = name
        #查找单词内容
        td_content = tr.xpath('./td[@class="span10"]')
        if len(td_content):
            content = td_content[0].text.strip()
            word['content'] = content
            
        if word != {}:
            words.append(word)

if __name__ == '__main__':
	#页数第一页,可以自己定义
    shanbei(1)
    print(words)
    ```

猜你喜欢

转载自blog.csdn.net/qq_31235811/article/details/88771174