获取历史天气 存储到mysql

# -*- coding:utf-8 -*-
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import pandas as pd

import pymysql

conn = pymysql.connect(host='127.0.0.1',port=3306,user='root',passwd='',db='weather')
cursor = conn.cursor()  # 获取游标 命令行


def year_month():
    urllist = []
    for i in range(2017,2018):
        for j in range(1,13):
            if j <10:
                url_text = 'http://lishi.tianqi.com/tianjin/'+str(i)+'0'+str(j)+'.html'
            else:
                url_text = 'http://lishi.tianqi.com/tianjin/'+str(i)+str(j)+'.html'
            urllist.append(url_text)

    return urllist

url_list = year_month()
#到现今共可取url_list[0:79]
# print(url_list[1:5])


#获取天气网下天气情况部分的数据
def weather_scraping(url):
    html = urlopen(url)
    bsObj = BeautifulSoup(html.read())
    div = bsObj.find("div",'tqtongji2')
    ul = div.find_all('ul')
    return ul


weul = weather_scraping(url_list[0])
isinstance(weul , list)

#解析获取的ul标签内数据 , 返回datafram 格式数据
def analysis_ul(url):
    ul = weather_scraping(url)
    weather_list = []
    for ul_text in ul[1:]:
        li_text = ul_text.find_all('li')
        day_data = []
        for text in li_text:
            data= text.get_text()
            day_data.append(data)
        # print(day_data[0],day_data[1],day_data[2],day_data[3])day_data[0],times,
        #
        sql = "INSERT INTO app01_cond(ti,max_c,min_c,con,c_id_id) VALUES('%s','%s','%s','%s','%s')" %(day_data[0],int(day_data[1]),int(day_data[2]),day_data[3],int(734))
        cursor.execute(sql)

        weather_list.append(day_data)
    columns = [ i.get_text() for i in ul[0].find_all('li')]
    weather_pd = pd.DataFrame(weather_list , columns=columns )
    return weather_pd

#爬取数据
weather_data = analysis_ul(url_list[0])
for i in url_list[1:79]:
    data_month  = analysis_ul(i)
    weather_data = weather_data.append(data_month,ignore_index=1)

    # pdc = pd.DataFrame(weather_data,columns=[data_month,])
    # print(pdc)

    # print(weather_data)

#
# #保存数据 到本地文件
# weather_data.to_csv('weather_201101_201707.txt' , index = False , sep = ',')
#
# weather_data.head(7)


conn.commit()   # commit 提交操作
cursor.close()  # 关闭连接
conn.close()





猜你喜欢

转载自blog.csdn.net/weixin_42100915/article/details/80913943