CSDN论坛之模型设计
一、CSDN论坛分析
论坛主题列表页:
详情页:
博主个人详情页:
我们根据以上页面分析我们需要抓取的数据,然后设计模型。
二、模型设计与数据表设计
from peewee import *
db = MySQLDatabase("py_spider", host="localhost", port=3307, user="root", password="root")
class BaseModel(Model):
class Meta:
database = db
class Topic(BaseModel):
title = CharField() # 标题
content = TextField(default="") # 内容
id = IntegerField(primary_key=True) # id
author = CharField() # 作者
create_time = DateTimeField() # 创建时间
answer_nums = IntegerField(default=0) # 回复数量
click_nums = IntegerField(default=0) # 查看数量
parised_nums = IntegerField(default=0) # 点赞数量
jtl = FloatField(default=0.0) # 结帖率
score = IntegerField(default=0) # 赏分
status = CharField() # 状态
class Answer(BaseModel):
topic_id = IntegerField()
author = CharField()
content = TextField(default="")
create_time = DateTimeField()
parised_nums = IntegerField(default=0) # 点赞数量
class Author(BaseModel):
name = CharField()
id = IntegerField(primary_key=True)
click_nums = IntegerField(default=0) # 访问数
original_nums = IntegerField(default=0) # 原创数
forward_nums = IntegerField(default=0) # 转发数
rate = IntegerField(default=-1) # 排名
answer_nums = IntegerField(default=0) # 评论数
parised_nums = IntegerField(default=0) # 获赞数
desc = TextField(null=True) # 个人描述签名
industry = CharField(null=True) # 行业
location = CharField(null=True) # 所在地区
follower_nums = IntegerField(default=0) # 粉丝数
following_nums = IntegerField(default=0) # 关注数
if __name__ == "__main__":
db.create_tables([Topic, Answer, Author])
运行后生成的数据表: