程序说明:
**因为2月是28或者29天,还有1 3 5 7 8 10 12 月是31天,用3次遍历太麻烦了,所以这个代码去爬取了除去2月的其他月份的30天的天气情况,
要想爬取完整的,修改for i in range(1,31): 中的31,并改为29或者28 31等,就可以爬取完整的天数**
import urllib.request
import bs4
from bs4 import BeautifulSoup
import re
import time
StartTime=time.time()
import MySQLdb
URL=[]
def MysqlConnection(SQL):
conn = MySQLdb.connect(host="localhost",user="root",password="lulianghao",db="studentmysql",port=3306,charset='utf8')
Conn = conn.cursor()
sql = SQL
try:
Conn.execute(sql)
conn.commit()
results = Conn.fetchall()
for row in results:
URL.append(row[0])
except Exception as e:
conn.rollback()
finally:
conn.close()
MysqlConnection("select * from url order by Time asc")
response = urllib.request.urlopen("http://www.tianqihoubao.com/lishi/xingtai.html")
html = response.read().decode('ANSI')
Div=BeautifulSoup(html,'html.parser')
GetText=Div.find_all('div',class_='box pcity')
GetA=Div.find_all('a')
GetA.nums=len(GetA[15:])
url=[]
for i in URL:
if(len(i)==59 and (i) !="http://www.tianqihoubao.com/lishi/xingtai/month/202006.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201102.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201202.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201302.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201402.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201502.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201602.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201702.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201802.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201902.html"
and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/202002.html"
):
url.append(i)
for lh in url:
response2 = urllib.request.urlopen(lh)
html2 = response2.read().decode('ANSI')
Div2 = BeautifulSoup(html2, 'html.parser')
GetText2 = Div2.find_all('tr')
for i in range(1,31):
HuoQu =GetText2[i].text.replace('\xa0', '\n\n')
Sqirt = re.sub('\s+', '',HuoQu).strip()
patten = r'["日",℃]'
result = re.split(patten, Sqirt)
list1 = []
list1.append(result)
for i in list1:
print(i[0] + "日")
print(i[1] + "℃" + i[2] + "℃")
print(i[3])
a=i[0] + "日"
b=i[1] + "℃" + i[2] + "℃"
c=i[3]
MysqlConnection("insert into tianqiyubao2(time,second,three) values('%s','%s','%s')" % (a, b, c))
EndTime=time.time()
print("程序共消耗时间:",EndTime-StartTime,"s")
print("数据添加数据库成功,请查看!!!")
只做记录学习
(qq:九七二四三九三二九,有不懂的可以问我)