python爬取并得到有用历史天气信息,并添加到数据库中

程序说明:

**因为2月是28或者29天,还有1 3 5 7 8 10 12 月是31天,用3次遍历太麻烦了,所以这个代码去爬取了除去2月的其他月份的30天的天气情况,
要想爬取完整的,修改for i in range(1,31): 中的31,并改为29或者28 31等,就可以爬取完整的天数**
#/*
#       网络数据获取
#       Time:2020-06-01
# */
import urllib.request
import bs4
from bs4 import BeautifulSoup
import re
import time
StartTime=time.time()
import MySQLdb
URL=[]
def MysqlConnection(SQL):
    conn = MySQLdb.connect(host="localhost",user="root",password="lulianghao",db="studentmysql",port=3306,charset='utf8')
    Conn = conn.cursor()
    sql = SQL
    try:
        Conn.execute(sql)
        conn.commit()
        results = Conn.fetchall()
        for row in results:
            URL.append(row[0])
    except Exception as e:
        conn.rollback()
    finally:
        conn.close()
MysqlConnection("select * from url order by Time asc")
response = urllib.request.urlopen("http://www.tianqihoubao.com/lishi/xingtai.html")
html = response.read().decode('ANSI')
Div=BeautifulSoup(html,'html.parser')
GetText=Div.find_all('div',class_='box pcity')
GetA=Div.find_all('a')
GetA.nums=len(GetA[15:])
url=[]
for i in URL:
    if(len(i)==59 and (i)      !="http://www.tianqihoubao.com/lishi/xingtai/month/202006.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201102.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201202.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201302.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201402.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201502.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201602.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201702.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201802.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/201902.html"
                       and (i) != "http://www.tianqihoubao.com/lishi/xingtai/month/202002.html"
    ):
        url.append(i)
for lh in url:
    response2 = urllib.request.urlopen(lh)  # 发出请求并且接收返回文本对象
    html2 = response2.read().decode('ANSI')  # 调用read()进行读取
    Div2 = BeautifulSoup(html2, 'html.parser')
    GetText2 = Div2.find_all('tr')
    for i in range(1,31):
        HuoQu =GetText2[i].text.replace('\xa0', '\n\n')
        Sqirt = re.sub('\s+', '',HuoQu).strip()
        patten = r'["日",℃]'
        result = re.split(patten, Sqirt)
        list1 = []
        list1.append(result)
        for i in list1:
            print(i[0] + "日")
            print(i[1] + "℃" + i[2] + "℃")
            print(i[3])
            a=i[0] + "日"
            b=i[1] + "℃" + i[2] + "℃"
            c=i[3]
            MysqlConnection("insert into tianqiyubao2(time,second,three) values('%s','%s','%s')" % (a, b, c))
EndTime=time.time()
print("程序共消耗时间:",EndTime-StartTime,"s")
print("数据添加数据库成功,请查看!!!")
只做记录学习
(qq:九七二四三九三二九,有不懂的可以问我)

猜你喜欢

转载自blog.csdn.net/weixin_45005209/article/details/106619679
今日推荐