天气爬虫

import  requests
import re
def get_url(url): resp = requests.get(url,headers=headers) html=resp.content data=str(html,'gbk') #html_doc=html.decode("utf-8","ignore") link_1="<li><a href='(.*?).html'>.*?</a></li>" link=re.compile(link_1,re.S).findall(data) return link def get_response(link): for i in range(0,len(link)): url2='http://www.tianqihoubao.com'+link[i]+'/2018030' for j in range(1,32): if j <10: url=url2+str(j)+'.html' else: url='http://www.tianqihoubao.com'+link[i]+'/201803'+str(j)+'.html' resp3 = requests.get(url, headers=headers) data3 = resp3.text low1=' <td style="color:#E54600" ><b>(.*?)</b></td>' high1='<td style="color:#000065"><b>(.*?)</b></td>' city1='<meta name="Keywords" content="(.*?)" />' city=re.compile(city1).findall(data3) low=re.compile(low1).findall(data3) high=re.compile(high1).findall(data3) a = city[0] b = low[0] c = high[0] print(a,b,c) if __name__ == '__main__': headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} url='http://www.tianqihoubao.com/weather/province.aspx?id=330000' link=get_url(url) get_response(link)

猜你喜欢

转载自www.cnblogs.com/snackpython/p/10136471.html
今日推荐