爬取zhihu李大爷的问答(ajax的)存入mysql

import requests
import mysqlhelper
import re
from urllib import request
import json
myhelper = mysqlhelper.MysqlHelper()
sql = 'INSERT INTO zhihu (title, excerpt) VALUES' \
      ' (%s, %s)'
for i in range(2):

    base_url = 'https://www.zhihu.com/api/v4/members/leedaye/answers?include=data%5B*%5D.is_normal%2Cmark_infos%2Ccreated_time%2Cupdated_time%2Creview_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cvoting%2Cis_author%2Cis_thanked%2Cis_nothelp%3Bdata%5B*%5D.author.badge%5B%3F(type%3Dbest_answerer)%5D.topics&offset={}&limit=20&sort_by=created'.format(20*(i-1))
    headers = {
    # 'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    # 'accept-language':'zh-CN,zh;q=0.9',
    # 'cache-control':'max-age=0',
    # 'referer':'https://www.zhihu.com/people/leedaye/answers?page=0',
    # 'cookie':'tgw_l7_route=53d8274aa4a304c1aeff9b999b2aaa0a; _zap=b1bfda66-559b-49f9-9147-b5c376fbf909; _xsrf=8e31439f-d0ce-4eca-ad1d-ced52f67526c; d_c0="ABAn3ZYDEg6PTiTCoGmrnU8xuAMZwa5YYAY=|1534507069"; q_c1=3d4734e375be4705afb474498113edca|1534507069000|1534507069000',
        'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
    }

    response=requests.get(base_url,headers=headers)
    json_data=response.text
    list_data=json.loads(json_data)
    # print(type(list_data))
    for i in list_data['data']:
        title=i['question']['title']
        excerpt=i['excerpt']
        print(title,' ',excerpt)
        zhihu_data = (title, excerpt)
        myhelper.fangfa(sql, zhihu_data)





猜你喜欢

转载自blog.csdn.net/weixin_42958164/article/details/81878735