#coding=utf-8 import re import requests from urllib import parse #UA信息: headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36' } #quote()编码;unquote()解码; def baidu_get_towdn(site,a,b): sites = [] for i in range(a,b): #10页为例(0,10) i = i*10 url = 'https://www.baidu.com/s?wd='+parse.quote(site)+'&pn='+str(i) #设定url请求 response = requests.get(url,headers=headers).text #get请求 baidudomain = re.findall('style="text-decoration:none;">(.*?)/',response) sites += list(baidudomain) site = list(set(sites)) #set()实现去重 print(site) print("\nThe number of sites is %d" %len(site)) for i in site: print(i) # baidu_get_towdn('site:baidu.cn',0,10)
python 用百度获取二级域名列表
猜你喜欢
转载自www.cnblogs.com/wjbk/p/11300717.html
今日推荐
周排行