from lxml import etree

爬取结构:

        

            res = requests.get(url,headers = headers)

            selector =  etree.HTML(res.text)

            #抓大标签

            url_infos = selector.xpath('xpath路径')

             .xpath('td/div/a/@title')[0]正确

               td前面的全部删除掉

.xpath('string(.)').strip():

                https://www.cnblogs.com/thunderLL/p/8038927.html

猜你喜欢

转载自blog.csdn.net/zjkpy_5/article/details/81041815