腾讯视频爬取评论

#腾讯视频爬取评论
import urllib.request
import re
import urllib.error
import ssl            #当使用urllib.urlopen打开一个 https 链接时,会验证一次 SSL 证书.全局取消证书验证
ssl._create_default_https_context = ssl._create_unverified_context

headers=('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64; rv:59.0) Gecko/20100101 Firefox/59.0')  #User agent
opener=urllib.request.build_opener()#添加对应的报头信息
opener.addheaders=[headers]
urllib.request.install_opener(opener)#设opener为全局
comid='6365504907156083559'
url='https://video.coral.qq.com/varticle/1909645650/comment/v2?callback=_varticle1909645650commentv2&orinum=10&oriorder=o&pageflag=1&cursor='+comid+'&scorecursor=0&orirepnum=2&reporder=o&reppageflag=1&source=9&_=1523451466201'
for i in range(0,2):
     try:
          data=urllib.request.urlopen(url).read().decode()
          patnext='"last":"(.*?)"'
          nextid=re.compile(patnext).findall(data)[0]
          patcom='"content":"(.*?)",'
          comdata=re.compile(patcom).findall(data)
          for j in range(0,len(comdata)):
               print('.....第'+str(i)+str(j)+'条评论内容是:')
               print (eval("u'"+comdata[j]+"'"))   #运行U后面的内容,u是将unicode转为文字.eval("u('("+comdata[j]+")')")#最外层引号是函数本身“ ”,内层引号u‘  ’用于将unicode翻译为中文,最内层是将文字显示出来
          url='https://video.coral.qq.com/varticle/1909645650/comment/v2?callback=_varticle1909645650commentv2&orinum=10&oriorder=o&pageflag=1&cursor='+nextid+'&scorecursor=0&orirepnum=2&reporder=o&reppageflag=1&source=9&_=1523451466201'
     except urllib.error.URLError as e:
          if hasattr(e,'code'): #判断对象是否包含对应属性
               print(e.code)
          if hasattr(e,'reason'):
               print(e.reason)

猜你喜欢

转载自blog.csdn.net/weixin_41988628/article/details/79919556