Python 并行任务技巧

多线程的http://www.oschina.net/translate/python-parallelism-in-one-line

参考了这位大牛的并发技巧，太高了。然后对现有的任务进行了升级改造，之前是用threading queue的

任务时间对比

单个执行

Wed Sep 20 17:54:26 2017 Wed Sep 20 17:55:51 2017 75秒

2个

Wed Sep 20 17:56:25 2017 Wed Sep 20 17:57:05 2017 40秒

4个
Wed Sep 20 17:57:51 2017 Wed Sep 20 17:58:15 2017 24秒

8个
Wed Sep 20 17:58:54 2017 Wed Sep 20 17:59:08 2017 14秒

#coding:utf-8
'''
Created on 2017年9月20日

@author: zhouxuan
@note: 使用queue来进行任务调度
'''

import urllib2
from multiprocessing.dummy import Pool as ThreadPool
import time
import re
import requests
def get_url_and_merchantid(filename):
    """
     filename 为       
            获取独立部署商户的url和merchant id"""
    lists = []
    reg = "(.*[a-z].*.com)\(\S*:(\d*)\)"
    with open(filename, 'r') as file:
        value = file.readline()
        names = value.split('*')
        for i in names:
            if len(i) > 5:
                lists.append(i)
    user = {}  # 字典形式存储url:merchant_id
    spec_merchant = []  # 格式特殊的独立部署商户
    for j in lists:
        infination = re.findall(reg, j)
        if len(infination) == 0:
            '单独处理格式异常的独立部署商户'
            spec_merchant.append(j)
        else:
            key = infination[0][0]
            value = infination[0][1]
            user[key] = value
    for k in spec_merchant:
        url = k.split('(')[0]
        merchant_id = k[-8:-2] #截取商户ID
        user[url] = merchant_id
    return user
def method(urlss):
    sys_url=urlss.split('&')[0]
    sys_url=sys_url.replace(' ','')
    sys_merchant_id=urlss.split('&')[1]
    s = requests.session()
    url_login = "https://passport.tongtool.com/check"  # 商户筛选界面
    datas = {"username":"****",
            "password":"*****"}
    headers={
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0',
            'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            }
    "预留一些需要剔除的 比如本地部署的和商户停止使用的"
    merchant_failure=['004872','000001','000007',\
                      '007849','003916','004189','005898',\
                      '009356','000690']
    page = s.post(url_login, data=datas,headers=headers, verify=False) #登录
    fail_url={}
    if sys_merchant_id not in merchant_failure:
        url1 = "https://passport.tongtool.com/adminSwitch?m=%s&d=%s"\
        %(sys_merchant_id,sys_url)
        print url1,sys_merchant_id,sys_url
        home_page = s.get(url1,headers=headers, verify=False)
        url2 = "http://%s/dashboard/homepage/index.htm" %sys_url  #首页
        "网络可能存在问题，加入重试机制"
        for i in range(3):
            home_page1 = s.post(url2,headers=headers,verify=False)
            if home_page1.status_code==200:# 拿到商户的首页信息
                break
            else:
                '把失败的存储起来'
                fail_url[sys_url]=sys_merchant_id
users=get_url_and_merchantid("urls.txt")
all=[]
for i,j in users.items():
    z=i+'&'+j
    all.append(z)
a=time.ctime()
pool=ThreadPool(8)
result=pool.map(method,all)
pool.close()
pool.join()
b=time.ctime()
print a,b

代码还没有优化，只做记录。实现了验证所有云服务器访问是否正常

Python 并行任务技巧

猜你喜欢