python crawling - crawling weather forecast content

Use requests module acquires the Web page content, bs4 data cleansing, pands save the data are generally for the csv format.
import requests
from bs4 import BeautifulSoup
import pandas as pd

DEF GET_DATA (URL):
     # requesting web content obtained 
    RES = requests.get (URL)
     # binary display web content 
    HTML res.content.decode = ( ' GBK ' )
     # formatted page 
    Soup = the BeautifulSoup (HTML, ' HTML. Parser ' )
     # use soup target find_all desired content 
    tr_list = soup.find_all ( ' TR ' )
     # extract the data needed 
    a dates = []
    tmp = []
    condiitions = []
     # cleaning data 
    for Data in tr_list [. 1 :]:
        rel_data = data.text.split()
        dates.append(rel_data[0])
        tmp.append(''.join(rel_data[3:6]))
        condiitions.append(''.join(rel_data[1:3]))
    Biaoge = pd.DataFrame ()
    Biaoge [ ' Date ' ] = a dates
    Biaoge [ ' temperature ' ] = tmp
    Biaoge [ ' weather ' ] = condiitions
     return Biaoge

mounth9 = Get_data('http://www.tianqihoubao.com/lishi/beijing/month/201909.html')
mounth10 = Get_data('http://www.tianqihoubao.com/lishi/beijing/month/201910.html')
mounth11 = GET_DATA ( ' http://www.tianqihoubao.com/lishi/beijing/month/201911.html ' )
 # use pd.concat stitching 9, 10 November to form a 
v = pd.concat ([mounth9 , mounth10, mounth11]). reset_index (drop = True)
 # save data csv format 
v.to_csv ( ' BeiJing.csv ' , index = False, encoding = ' UTF-. 8 ' )
Weather Forecast reptiles

 

Guess you like

Origin www.cnblogs.com/eddycomeon/p/11972188.html