1.普通的<央视新闻>微博主页爬虫
import urllib.request
import re
import urllib.error
#1.伪装浏览器
headers=('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36')#谷歌浏览器
opener=urllib.request.build_opener()
opener.addheaders=[headers]
urllib.request.install_opener(opener)
#2.
for i in range(1,2):
url='https://weibo.com/cctvxinwen?is_search=0&is_all=1&page='+str(i)
pagedata=urllib.request.urlopen(url).read().decode("utf-8","ignore")
#print(len(pagedata))