python爬取微博转发以及转发后的点赞数、转发人信息

延续上一篇博客,这一篇是为了爬取微博转发人的一些相关数据,数据的分析没什么太大的难度,找到翻页规律就行,不多说,直接贴代码

# -*- coding:utf-8 -*-
__author__ = 'TengYu'
import requests
import xlwt
import  re
import json
import time

headers = {'User-agert':'Your-agent',
           'Cookie':'Your-Cookie'}


#工具类,用来去除爬取的正文中一些不需要的链接、标签等
class Tool:
    deleteImg = re.compile('<img.*?>')
    newLine =re.compile('<tr>|<div>|</tr>|</div>')
    deleteAite = re.compile('//.*?:')
    deleteAddr = re.compile('<a.*?>.*?</a>|<a href='+'\'https:')
    deleteTag = re.compile('<.*?>')
    deleteWord = re.compile('回复@|回覆@|回覆|回复')

    @classmethod
    def replace(cls,x):
        x = re.sub(cls.deleteWord,'',x)
        x = re.sub(cls.deleteImg,'',x)
        x = re.sub(cls.deleteAite,'',x)
        x = re.sub(cls.deleteAddr, '', x)
        x = re.sub(cls.newLine,'',x)
        x = re.sub(cls.deleteTag,'',x)
        return x.strip()



class zhuanfa(object):
    def get_zhuanfa(self):
        File = open('filename.txt', "w")
        excel = xlwt.Workbook(encoding='utf-8')
        sheet = excel.add_sheet('sheet1')
        sheet.write(0, 0, 'id')
        sheet.write(0, 1, 'name')
        sheet.write(0, 2, 'time')
        sheet.write(0, 3, 'text')
        sheet.write(0, 4, 'likes')
        count = 0
        i = 0
        while True and count < 1000:
            url = 'https://m.weibo.cn/api/statuses/repostTimeline?id=4303334066243259&page='
            i = i + 1
            url = url + str(i)
            print url
            try:
                response = requests.get(url, headers=headers)
                resjson = json.loads(response.text)
                dataset = resjson.get('data')
                data = dataset.get('data')
                for j in range(0, len(data)):
                    temp = data[j]
                    user = temp.get('user')
                    text = temp.get('text')
                    text = Tool.replace(text)
                    userid = user.get('id')
                    screen_name = user.get('screen_name')
                    created_at = temp.get('created_at')
                    attitudes_count = temp.get('attitudes_count')
                    count += 1
                    File.write(text.encode('utf-8') + '\n')
                    sheet.write(count,0,userid)
                    sheet.write(count,1,screen_name.encode('utf-8'))
                    sheet.write(count,2,created_at.encode('utf-8'))
                    sheet.write(count,3,text.encode('utf-8'))
                    sheet.write(count,4,attitudes_count)
                print ("已经获取" + str(count) + "条数据")
                time.sleep(3)
            except Exception,e:
                print e
        File.close()
        excel.save('filename.xls')

if __name__ == '__main__':
    Zhuanfa = zhuanfa()
    Zhuanfa.get_zhuanfa()

尊重原作,转载请注明,转载自:https://blog.csdn.net/kr2563

猜你喜欢

转载自blog.csdn.net/kr2563/article/details/84581233