1 import requests 2 from bs4 import BeautifulSoup 3 alluniv = [] 4 def getHTMLText(url): 5 try: 6 r = requests.get(url,timeout = 30) 7 r.raise_for_status() 8 r.encoding = 'utf-8' 9 return r.text 10 except: 11 return "error" 12 def xunhuang(url): 13 for i in range(20): 14 getHTMLText(url) 15 def fillunivlist(soup): 16 data=soup.find_all('tr') 17 for tr in data: 18 ltd =tr.find_all('td') 19 if len(ltd)==0: 20 continue 21 singleuniv=[] 22 for td in ltd: 23 singleuniv.append(td.string) 24 alluniv.append(singleuniv) 25 def printf(): 26 print("\n") 27 print("\n") 28 print("\n") 29 def main(): 30 url = "http://www.google.com" 31 html=getHTMLText(url) 32 xunhuang(url) 33 print(html) 34 soup=BeautifulSoup(html,"html.parser") 35 fillunivlist(soup) 36 print(html) 37 printf() 38 print(soup.title) 39 printf() 40 print(soup.head) 41 printf() 42 print(soup.body) 43 main()
requests库访问网站
猜你喜欢
转载自www.cnblogs.com/double-star/p/12916367.html
今日推荐
周排行