好久没有更新博客园了,一直在我的csdn(https://blog.csdn.net/u013252962)更新,今天挪动一篇。最近打算刷关于数据库的题,对应非会员,只能做题每日一练了,可是刷题时间基本都是地铁上,所以讲题爬下来,用vue做个简单的页面,希望有同样需求的小伙伴来看下,别忘记点赞奥!
python源码(无框架)
# -*- coding: utf-8 -*-
import requests
from lxml import etree
import json
import re
import pandas as pd
import numpy as np
import pymysql
cookie = ""
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
"Cookie": cookie,
}
# pymysql 数据库操作
class MysqlAct(object):
def __init__(self):
self.connect = pymysql.connect('localhost', 'root', 'root', 'tpcommon', use_unicode=True, charset='utf8')
self.cursor = self.connect.cursor()
def select(self, sql):
self.cursor.execute(sql)
return self.cursor.fetchall()
def find(self, sql):
self.cursor.execute(sql)
return self.cursor.rowcount
def insert(self, sql):
self.cursor.execute(sql)
self.connect.commit()
def update(self, sql):
self.cursor.execute(sql)
self.connect.commit()
def colose(self):
self.connect.close()
# 爬虫类
class Spider(object):
def __init__(self):
self.page = 1
# 获取原始列表
def GetClist(self,totalpage=2):
mysql = MysqlAct()
while self.page <= totalpage:
print("begin----",self.page)
url=''
classurl = "url-{}.html".format(self.page)
r = requests.get(classurl, headers=header)
html = etree.HTML(r.content)
list = html.xpath("//div[@class='ecv2_tikucom_doItem clearfix']")
for v in list:
title = v.xpath(".//div[@class='ecv2_tikucom_doTitle ecv2_marginbottom16']/text()")[0]
a = v.xpath(".//a//@href")[0]
if (a == 'javascript:;'):
a = v.xpath(".//a//@data-accessid")[0]
id = re.findall(r'\d+', a)
row = (title, id[0])
fields = '''(title, tcid)'''
sql = "insert into fcxlt_a_ruankao_list %s VALUES %s" % (fields, row)
mysql.insert(sql)
self.page = self.page+1
print("over---",self.page)
mysql.colose()
# 获取测试报告列表入库
def GetChecks(self):
# 有404的 未做过容错跑第二遍sql = "select * from fcxlt_a_ruankao_list where checkid is null order by id asc"
sql = "select * from fcxlt_a_ruankao_list order by id asc"
mysql = MysqlAct()
res = mysql.select(sql)
df = pd.DataFrame(res)
df.columns = ['id', 'title','tcid','checkid','checkurl']
url = "https://uc.educity.cn/ucapi/uc/paper/startExam.do"
for i in range(df.shape[0]):
id = df['id'][i]
oldtcid = df['tcid'][i]
r = requests.post(url, data={'tcId': oldtcid, 'model': 'Exam'}, headers=header)
json_a = json.loads(r.content)
newid = json_a['model']['data']
teata = "https://uc.educity.cn/tiku/testReport.html?id=" + str(json_a['model']['data'])
upsql = "UPDATE fcxlt_a_ruankao_list SET checkid = '%d',checkurl='%s' WHERE id = '%d'" % (newid,teata,id)
mysql.update(upsql)
mysql.colose()
# 获取试题答案和列表
def GetQes(self):
sql = "select * from fcxlt_a_ruankao_list order by id asc"
mysql = MysqlAct()
res = mysql.select(sql)
df = pd.DataFrame(res)
df.columns = ['id', 'title', 'tcid', 'checkid', 'checkurl']
url = "https://uc.educity.cn/ucapi/uc/testPaperLog/loadShitiLogByTestId.do"
mysql = MysqlAct()
for i in range(df.shape[0]):
id = df['checkid'][i]
r = requests.post(url, data={'paperLogId': id}, headers=header)
json_a = json.loads(r.content)
checkid = id
for i in range(10):
s = json_a['model'][i]['shiti']
title = "\'" + s['tigan'] + "\'"
ansy = "\'" + s['analysis'] + "\'"
xuanxiang = "\'" + s['questionDelHTMLTag'] + "\'"
answer = "\'" + s['answerStr'] + "\'"
num = s['questionNum']
shitiid = s['id']
sqlf = "select * from fcxlt_a_ruankao_shiti where shitiid = %d" % (shitiid)
# count = mysql.find(sqlf)
# print(count,shitiid)
# print("insert---------", count, shitiid)
rows = (title, ansy, xuanxiang, answer, num, checkid, shitiid)
fields = '''(title,ansy,xuanxiang,answer,num,checkid,shitiid)'''
sql = "insert into fcxlt_a_ruankao_shiti %s VALUES %s" % (fields, rows)
mysql.insert(sql)
mysql.colose()
if __name__ == '__main__':
#获取列表存入数据库
spider = Spider()
# spider.GetClist(30)# 第一步
# spider.GetChecks()# 第2步
# spider.GetQes()# 第三步
mysql建表语句
CREATE TABLE `fcxlt_a_ruankao_list` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(255) DEFAULT NULL,
`tcid` int(11) DEFAULT NULL,
`checkid` int(11) DEFAULT NULL,
`checkurl` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4
CREATE TABLE `fcxlt_a_ruankao_shiti` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`listid` int(11) DEFAULT NULL,
`title` text,
`answer` varchar(255) DEFAULT NULL,
`ansy` text,
`xuanxiang` text,
`num` int(11) DEFAULT NULL,
`checkid` int(11) DEFAULT NULL,
`shitiid` int(11) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=1DEFAULT CHARSET=utf8mb4