python爬虫获取双色球历史中奖纪录&写入数据库


from datetime import datetime
import pymysql
import requests
import time
import re
import urllib.request
import os
import json
from bs4 import BeautifulSoup
#获取资源的路径
#url='http://repo1.maven.org/maven2/HTTPClient/'
#存放到路径
pathFinal="C:\\Users\\gao\\Desktop\\mysqlConnectionDown"
url="https://datachart.500.com/ssq/history/newinc/history.php?limit=100&sort=0"
url2 ="http://datachart.500.com/ssq/history/newinc/history.php?start=1001&end=19019"
content = requests.get(url2,timeout = 500 )
content = content.text
# print(content)
#content = content.replace("var libs =","")
#print(json.loads(content))
soup = BeautifulSoup(content, 'html.parser')
content=soup.find(id='tdata')
# print(content)
trs = content.find_all('tr')
conn = pymysql.connect(host='localhost', user='root',password='123456',database='aaa',charset='utf8')
cursor = conn.cursor()
# trs = str(trs)
# trs = trs.replace(' <tr class="tdbck"><td colspan="51"></td></tr>, ','')
# #print(trs)
# trs = BeautifulSoup(trs, 'html.parser')
# print(type(trs))
for tr in trs :
#     if str(tr) !='<tr class="tdbck"><td colspan="51"></td></tr>':
#

       # print(trs[0])
    idre = re.compile('<tr class="t_tr1"><!--<td>2</td>--><td>(.*?)</td>')
    id = re.findall(idre, str(tr))
    # print(id[0])
    id= id[0]
    hongre = re.compile('<td class="t_cfont2">(.*?)</td>')
    hongs = re.findall(hongre, str(tr))
    # for hong in hongs:
    hong1=hongs[0]
    hong2=hongs[1]
    hong3=hongs[2]
    hong4=hongs[3]
    hong5=hongs[4]
    hong6=hongs[5]
    lanre = re.compile('<td class="t_cfont4">(.*?)</td>')
    lan = re.findall(lanre, str(tr))
    print(lan[0])
    riqire = re.compile('</td><td>([^<]*?)</td></tr>')
    riqi = re.findall(riqire, str(tr))
    print(riqi[0])
    print('-----------')
    # sql = "INSERT INTO caipiao(qi) VALUES ( %d);"
    sql = "INSERT INTO caipiao(qi,hong1,hong2,hong3,hong4,hong5,hong6,lan,riqi) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,'%s');" %(id,hong1,hong2,hong3,hong4,hong5,hong6,lan[0],datetime.strptime(riqi[0], "%Y-%m-%d"))
    # 执行SQL语句

    print(id)
    print(type(id))
    # print(hong1)
    # print(type(hong1))
    # print(hong2)
    # print(hong3)
    # print(hong4)
    # print(hong5)
    # print(hong6)
    # print(lan[0])
    # print(type(lan[0]))
    # print(datetime.strptime(riqi[0], "%Y-%m-%d"))
    # print(type(datetime.strptime(riqi[0], "%Y-%m-%d")))
    # #datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    cursor.execute(sql)
    conn.commit()

#print(content)

# content= json.loads(content)
# print(len(content))
# for i in content:
#     print("geturl(\"http://cdn.code.baidu.com/v/"+i+"\")")
#


# 提交事务

cursor.close()
conn.close()

猜你喜欢

转载自blog.csdn.net/qq_35464063/article/details/87911860