Determining whether the specified file contains the following url

'' ' 
1 to read the local list url
2. individually access url
3. determines whether the specified file contains the url is below
4. If included, the local write url, if it does not, the removal of the url
' ''
Import Requests
Import multiprocessing


class Check_file (Object):
DEF the __init __ (Self):
self.headers = {
'the User-- Agent': "the Mozilla / 5.0 (the Macintosh; the Intel the Mac the OS X-10_13_1) AppleWebKit / 537.36 (KHTML, like the Gecko) the Chrome / Safari 63.0.3239.84 / 537.36 ",
}

# reads the local list and return url
DEF read_local_file (Self, Q):
with Open ( '../ url / the url.txt', 'R & lt', encoding = 'UTF-. 8' ) AS F:
url_data f.readlines = ()
for URL in url_data:
Q.PUT (URL)
Print ( "read data finished")

# 逐个访问url,并判断数据是否存在
def request_url(self, q):
suffixs = ['index.php']
while True:
url_data = q.get()
for url in url_data:
if url.startswith('https://'):
url_one = url
elif not url.startswith('http://'):
url_one = 'http://' + url.strip() + '/'
for suffix in suffixs:
re_url = url_one.strip() + suffix
try:
re = requests.get(url=re_url, headers=self.headers, timeout=1)
if re.status_code == 200 and 'define' in re.content.decode():
print('[*]' + '\t' + re_url)
with open('../file/file.txt', 'a', encoding='utf-8') as f:
f.write(re_url + '\n')
except Exception as e:
print(e)
if q.empty():
break

# 方法集合
def main(self):
q = multiprocessing.Queue()
p1 = multiprocessing.Process(target=self.read_local_file, args=(q,))
p2 = multiprocessing.Process(target=self.request_url, args=(q,))

p1.start()
p2.start()


if __name__ == '__main__':
c = Check_file()
for i in range(10):
c.main()

Guess you like

Origin www.cnblogs.com/victorstudy/p/11425859.html