之前用python写多进程用multiprocessing库,多线程调用threading库,python3中multiprocessing的多线程比threading的多线程写法简单多,记录下用法。
提:多线程多进程不是用来炫技的技术、工作中提升效率的选择。多数高计算密集性任务用多进程,高IO密集型任务用多线程。
一、带参数的多线程用法
from multiprocessing.dummy import Pool
def test(data):
print (data)
# #----------启动多线程--------#
domainlist = [1,2,3,4]
pool = Pool(50) #配置启动线程数
result = pool.map(test, domainlist) # 参数:多次传递值调用的函数,传递参数的list
二、带参数的多进程使用
from multiprocessing import Pool
def test (data):
print (data)
#----------启动多进程---------#
domainlist = [1,2,3,4]
pool = Pool(processes=10) #启动进程个数
pool.map(test,domainlist) #map函数两个参数,第一个是需要迭代的函数,第二个是需要遍历的参数写成list
pool.close()
pool.join()
#-----结束多进程--------------#
三、使用实例:
eg:大量的domain需要判断下是否存活,需要尽快判断完成。参考用多线程和多进程写发实现。
demo1:用多线程实现 python3 mthreat.py
import time,json,requests
#import threading
from multiprocessing.dummy import Pool
def req(domain):
try:
url = 'https://'+domain
r = requests.get(url,timeout=2)
#print("请求域名"+domain+"返回码:" + str(r.status_code))
return r.status_code
except Exception as e:
#print(e)
return 'noserver'
def writefile(filename,data):
wf = open(filename, 'a+') # python3
print(data, file=wf)
wf.close()
def checkhttp(domain):
data = req(domain)
if data == 200:
print(domain + "-----is alive ")
writefile('alive.log', domain)
elif data == 301 or data == 302:
print('30x跳转:' + domain)
elif data == 'noserver':
writefile('notsever.log', domain)
if __name__ == "__main__":
print('start_time:'+str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
f1 = "/root/test/alldomain.txt"
domainlist = []
with open(f1,'r') as f:
for domain in f.readlines():
domain = domain.split()[0]
domainlist.append(domain)
# #----------启动多线程--------#
pool = Pool(10) #配置启动线程数
result = pool.map(checkhttp, domainlist) # 参数:多次传递值调用的函数,传递参数的list
print('end_time:' + str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
启动后验证是否启动成功:top,pstree命令都可 (多线程启动10个线程,实际存在13个,3个是主进程调度使用的线程)
demo2:用多进程实现 python3 mportest.py
import time,json,requests
from multiprocessing import Pool
def req(domain,timeout=2):
try:
url = 'https://'+domain
r = requests.get(url)
print("请求域名"+domain+"返回码:" + str(r.status_code))
return r.status_code
except Exception as e:
pass
return 'nohttpserver'
def writefile(filename,data):
wf = open(filename, 'a+') # python3
print(data, file=wf)
wf.close()
def checkhttp(domain):
data = req(domain)
if data == 200:
writefile('Ishttpserver.log',domain)
elif data == 301 or data == 302:
print('30x跳转:' + domain) #待二次判断
elif data == 'nohttpserver':
writefile('Nserver.log', domain)
if __name__ == "__main__":
f1 = "/root/test/alldomain.txt"
domainlist = []
with open(f1,'r') as f:
for domain in f.readlines():
domain = domain.split()[0]
domainlist.append(domain)
#----------启动多进程---------#
pool = Pool(processes=10) #启动进程个数
pool.map(checkhttp,domainlist) #map函数两个参数,第一个是需要迭代的函数,第二个是需要遍历的参数写成list
pool.close()
pool.join()
#-----结束多进程--------------#