python爬取微博粉丝列表前20

import requests
from bs4 import BeautifulSoup
import json
import re

headers = {"Cookie": "SINAGLOBAL=1340288961669.5112.1559828635483; UOR=,,www.baidu.com; Ugrow-G0=169004153682ef91866609488943c77f; login_sid_t=851912c52eb5bfb51baac61a5c95e5ac; cross_origin_proto=SSL; YF-V5-G0=8a1a69dc6ba21f1cd10b039dff0f4381; _s_tentry=passport.weibo.com; Apache=8952837854874.121.1560057315193; ULV=1560057315202:4:4:1:8952837854874.121.1560057315193:1559956552023; SSOLoginState=1560057387; YF-Page-G0=112e41ab9e0875e1b6850404cae8fa0e|1560519695|1560519693; SCF=AqS7E90doP65N8vbMSLd4kyOAtaeFwjmNEQlgZrEOg_A7qDzGoMkQI1mFPd9IIj1w28fCg0CzB-BStG3QOo4Vmc.; SUB=_2A25wB9RBDeRhGeBP4lUR9SnOyDmIHXVTdUKJrDV8PUJbmtBeLXTYkW9NRQ29hCuEgG2yOH2yx3LX7m_UqH9GwHDe; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhOsgAJRZjhUO8CY1cVR00u5JpX5K-hUgL.Foqp1KM7SKMEe0-2dJLoIpUZqgSaqbH8SCHWSb-RebH8SC-R1F-4x5tt; SUHB=0C1yiLJnMlt4wz; ALF=1592055685"}
r = requests.get('https://weibo.com/p/1004061195242865/follow?relate=fans&from=100406&wvr=6&mod=headfans&current=fans#place',headers=headers)
fansStr = r.text.split("</script>")
tmpJson = fansStr[-2][17:-1] if fansStr[-2][17:-1].__len__()>fansStr[-3][17:-1].__len__() else fansStr[-3][17:-1]
soup = BeautifulSoup(json.loads(tmpJson)['html'],'lxml')

class person(object):
    def __init__(self, personTag = None):
        self.analysis(personTag)
    def analysis(self,personTag):
        self.analysisName(personTag)
        self.analysisFollowAndFansNumber(personTag)
        self.analysisCity(personTag)
        self.analysisIntroduce(personTag)
        self.analysisFollowWay(personTag)
        self.analysisID(personTag)
 
    def analysisName(self,personTag):
        self.name = personTag.div.a.string
    def analysisFollowAndFansNumber(self,personTag):
        for divTag in personTag.find_all('div'):
            if divTag['class'] == ["info_connect"]:
                infoTag = divTag
        if locals().get("infoTag"):
            self.followNumber = infoTag.find_all('span')[0].em.string
            self.fansNumber = infoTag.find_all('span')[1].em.a.string
            self.assay = infoTag.find_all('span')[2].em.a.string
 
    def analysisCity(self,personTag):
        for divTag in personTag.find_all('div'):
            if divTag['class'] == ['info_add']:
                addressTag = divTag
        if locals().get('addressTag'):
            self.address = addressTag.span.string
 
    def analysisIntroduce(self,personTag):
        for divTag in personTag.find_all('div'):
            if divTag['class'] == ['info_intro']:
                introduceTag = divTag
        if locals().get('introduceTag'):
            self.introduce = introduceTag.span.string
 
    def analysisFollowWay(self,personTag):
        for divTag in personTag.find_all('div'):
            if divTag['class'] == ['info_from']:
                fromTag = divTag
        if locals().get('fromTag'):
            self.fromInfo = fromTag.a.string
 
    def analysisID(self,personTag):
        personRel = personTag.dt.a['href']
        self.id = personRel[personRel.find('=')+1:-5]+personRel[3:personRel.find('?')]

for divTag in soup.find_all('div'):
    if divTag['class'] == ["follow_inner"]:
        followTag = divTag

if __name__ == '__main__': 
    if locals().get("followTag"):
        for personTag in followTag.find_all('dl'):
            p = person(personTag)
            print(p.__dict__)

运行效果如下:
在这里插入图片描述

发布了3 篇原创文章 · 获赞 5 · 访问量 540

猜你喜欢

转载自blog.csdn.net/weixin_40618452/article/details/92217621