判断该url下面是否包含指定的文件

'''
1.读取本地url列表
2.逐个访问url
3.判断该url下面是否包含指定的文件
4.如果包含,将该url写入本地,如果不包含,则去除该url
'''
import requests
import multiprocessing


class Check_file(object):
def __init__(self):
self.headers = {
'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
}

# 读取本地url列表并返回
def read_local_file(self, q):
with open('../url/url.txt', 'r', encoding='utf-8') as f:
url_data = f.readlines()
for url in url_data:
q.put(url)
print('数据读取完毕')

# 逐个访问url,并判断数据是否存在
def request_url(self, q):
suffixs = ['index.php']
while True:
url_data = q.get()
for url in url_data:
if url.startswith('https://'):
url_one = url
elif not url.startswith('http://'):
url_one = 'http://' + url.strip() + '/'
for suffix in suffixs:
re_url = url_one.strip() + suffix
try:
re = requests.get(url=re_url, headers=self.headers, timeout=1)
if re.status_code == 200 and 'define' in re.content.decode():
print('[*]' + '\t' + re_url)
with open('../file/file.txt', 'a', encoding='utf-8') as f:
f.write(re_url + '\n')
except Exception as e:
print(e)
if q.empty():
break

# 方法集合
def main(self):
q = multiprocessing.Queue()
p1 = multiprocessing.Process(target=self.read_local_file, args=(q,))
p2 = multiprocessing.Process(target=self.request_url, args=(q,))

p1.start()
p2.start()


if __name__ == '__main__':
c = Check_file()
for i in range(10):
c.main()

猜你喜欢

转载自www.cnblogs.com/victorstudy/p/11425859.html