Use requests module acquires the Web page content, bs4 data cleansing, pands save the data are generally for the csv format.
import requests from bs4 import BeautifulSoup import pandas as pd DEF GET_DATA (URL): # requesting web content obtained RES = requests.get (URL) # binary display web content HTML res.content.decode = ( ' GBK ' ) # formatted page Soup = the BeautifulSoup (HTML, ' HTML. Parser ' ) # use soup target find_all desired content tr_list = soup.find_all ( ' TR ' ) # extract the data needed a dates = [] tmp = [] condiitions = [] # cleaning data for Data in tr_list [. 1 :]: rel_data = data.text.split() dates.append(rel_data[0]) tmp.append(''.join(rel_data[3:6])) condiitions.append(''.join(rel_data[1:3])) Biaoge = pd.DataFrame () Biaoge [ ' Date ' ] = a dates Biaoge [ ' temperature ' ] = tmp Biaoge [ ' weather ' ] = condiitions return Biaoge mounth9 = Get_data('http://www.tianqihoubao.com/lishi/beijing/month/201909.html') mounth10 = Get_data('http://www.tianqihoubao.com/lishi/beijing/month/201910.html') mounth11 = GET_DATA ( ' http://www.tianqihoubao.com/lishi/beijing/month/201911.html ' ) # use pd.concat stitching 9, 10 November to form a v = pd.concat ([mounth9 , mounth10, mounth11]). reset_index (drop = True) # save data csv format v.to_csv ( ' BeiJing.csv ' , index = False, encoding = ' UTF-. 8 ' )