Estuche de red para bolas de nieve

1. Primero, encapsulamos un paquete para agregar el código de la base de datos, que es conveniente para llamar más tarde

import pymysql

class mysql_conn(object):
    # 魔术方法,初始化,析构函数
    def __init__(self):
        self.db = pymysql.connect("localhost","root","******","xueqiu__text")
        # 创建游标对象
        self.cursor =self.db.cursor()
    # 执行MySQL语句
    def execute__mysql(self,k):
        self.cursor.execute(k)
        self.db.commit()
    # 魔术方法 使用完对象回收资源
    def __del__(self):
        self.cursor.close()
        self.db.close()

if __name__ == '__main__':
    # sql = 'insert into zhang(id,age) values (3,2)'
    sql = "insert into zhang(uid,title,target,description) values (1,'df','ds','dds')"

    mc = mysql_conn()
    mc.execute__mysql(sql)

2. Analiza la URL

#1
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=-1&count=10&category=111
# next_id:184263
# next_max_id:184275
# tip:null
#
# #2
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184275&count=15&category=111
# next_id:184082
# next_max_id:184086
# tip:null
#
#
# #3
# https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id=184086&count=15&category=111
# next_id:183682
# next_max_id:183687
# tip:null

 

3. Ingrese el tema, rastrear datos,

import requests
import json
# 调用上面封装好的执行数据库添加
from MySQL__text import mysql_conn




#1 把url路径拼接出来
#2 把每个ajax里面的信息拿出来
i = 1  #代表一次ajax
max_id = -1
count = 10
while i <=15:

    url = 'https://xueqiu.com/v4/statuses/public_timeline_by_category.json?since_id=-1&max_id={}&count={}&category=111'.format(str(max_id),str(count))

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36',
        'Cookie': 'aliyungf_tc=AQAAANYIWD45fAgAUhVFedAJ1g52dN1J; xq_a_token=584d0cf8d5a5a9809761f2244d8d272bac729ed4; xq_a_token.sig=x0gT9jm6qnwd-ddLu66T3A8KiVA; xq_r_token=98f278457fc4e1e5eb0846e36a7296e642b8138a; xq_r_token.sig=2Uxv_DgYTcCjz7qx4j570JpNHIs; _ga=GA1.2.1201105619.1534335404; _gid=GA1.2.839495955.1534335404; u=711534335406418; device_id=11784be644def4e388466a52197bbf16; Hm_lvt_1db88642e346389874251b5a1eded6e3=1534335407,1534335456,1534340424,1534340452; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1534341631'

    }

    response = requests.get(url, headers=headers)
    #打印一下看是否获取到数据
    info = response.content
    # print(type(info),info)
    # bytes 转换成函数 json.loads对象必须是字符串类型,response.text是字符串类型
    res__dict = json.loads(response.text)

    max_id = res__dict['next_max_id']

    for j in range(count):
        data = json.loads(res__dict['list'][j]['data'])

        # print(j)
        uid = data['id']
  
        title = data['title']
  
        target = data['target']
   
        description = data['description']
        print(description)

        # mc = MySQL__text()
        sql = "insert into zhang(uid,title,target,description) values ('{}','{}','{}','{}')".format(uid,title,target,description)

        mc1 = mysql_conn()
        mc1.execute__mysql(sql)


    # target
    # description
    # # 取出数据
    # res = res__dict['list']
    # print(res)
    #
    # for i in res:
    #     result = i['data']
    #     print(result)

    i += 1
    count = 15

 

Supongo que te gusta

Origin blog.csdn.net/zhang_8626/article/details/81713958
Recomendado
Clasificación