requests库访问网站

 1 import requests
 2 from bs4 import BeautifulSoup
 3 alluniv = []
 4 def getHTMLText(url):
 5     try:
 6         r = requests.get(url,timeout = 30)
 7         r.raise_for_status()
 8         r.encoding = 'utf-8'
 9         return r.text
10     except:
11         return "error"
12 def xunhuang(url):
13      for i in range(20):
14          getHTMLText(url)
15 def fillunivlist(soup):
16     data=soup.find_all('tr')
17     for tr in data:
18         ltd =tr.find_all('td')
19         if len(ltd)==0:
20             continue
21         singleuniv=[]
22         for td in ltd:
23             singleuniv.append(td.string)
24         alluniv.append(singleuniv)
25 def printf():
26      print("\n")
27      print("\n")
28      print("\n")
29 def main():
30      url = "http://www.google.com"
31      html=getHTMLText(url)
32      xunhuang(url)
33      print(html)
34      soup=BeautifulSoup(html,"html.parser")
35      fillunivlist(soup)
36      print(html)
37      printf()
38      print(soup.title)
39      printf()
40      print(soup.head)
41      printf()
42      print(soup.body)
43 main()

猜你喜欢

转载自www.cnblogs.com/double-star/p/12916367.html