python 加速运算

测试代码运行的时间

import time

tic = time.time()
much_job = [x**2 for x in range(1, 1000000, 3)]
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# userd: 0.06204s

一、加速查找

1. 用set而非list

import time

data = [i**2+1 for i in range(1000000)]
list_data = list(data)
set_data = set(data)
# normal
tic = time.time()
s = 1098987 in list_data
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# speed up
tic = time.time()
ss = 1098987 in set_data
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# userd: 0.00833s
# userd: 0.00000s

2. 用dict而非两个list进行匹配查找

import time

list_a = [i*2-1 for i in range(1000000)]
list_b = [i**2 for i in list_a]
dict_ab = dict(zip(list_a, list_b))
# normal
tic = time.time()
a = list_b[list_a.index(876567)]
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# speed up
tic = time.time()
aa = dict_ab.get(876567, None)
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# userd: 0.00394s
# userd: 0.00000s

二、加速循环,在循环体中避免重复计算,用循环机制代替递归函数

3. 用for而非while

import time

tic = time.time()
s, i = 0, 0
while i<100000:
    i += 1
    s += i
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))


tic = time.time()
s, i = 0, 0
for i in range(1, 100001):
    i += 1
    s += i
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))

三、利用库函数进行加速

4. 用numba加速Python函数

import time

tic = time.time()
def my_power(x):
    return (x**2)

def my_power_sum(n):
    s = 0
    for i in range(1, n+1):
        s = s + my_power(i)
    return s
s = my_power_sum(1000000)
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))

# speed up
from numba import jit
tic = time.time()
@jit
def my_power(x):
    return (x**2)
@jit
def my_power_sum(n):
    s = 0
    for i in range(1, n+1):
        s = s + my_power(i)
    return s
ss = my_power_sum(1000000)
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# userd: 0.25207s
# userd: 0.09088s

代码是使用numpy做数字运算,并且常常有很多的循环,那么使用Numba就是一个很好的选择。

numba不适合字典型变量和一些非numpy的函数,尤其是上面numba不能解析pandas,上面的函数内容在运行时也就无法编译。

5. 用map加速Python函数

import time

tic = time.time()
res = [x**2 for x in range(1, 1000000, 3)]
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))

# speed up

tic = time.time()
res = map(lambda x:x**2, range(1, 1000000, 3))
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 0.07043s
# used: 0.00244s

6. 用filter加速Python函数

import time

tic = time.time()
res = [x**2 for x in range(1, 1000000, 3) if x%7==0]
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))

# speed up

tic = time.time()
res = filter(lambda x:x%7==0, range(1, 1000000, 3))
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 0.02840s
# used: 0.00033s


7. 用np.where加速if函数

import time

import numpy as np

array_a = np.arange(-100000, 100000)
tic = time.time()
relu = np.vectorize(lambda x: x if x>0 else 0)
arr = relu(array_a)
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))

# speed up

tic = time.time()
relu = lambda x:np.where(x>0, x, 0)
arrr = relu(array_a)
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 0.02531s
# used: 0.00083s

8. 多线程thread加速

import time

import numpy as np


tic = time.time()

def writefile(i):
    with open(str(i)+'.txt', 'w') as f:
        s = ('hello %d\n'%i) * 10000000
        f.write(s)
for i in range(40,50, 1):
    writefile(i)

toc = time.time()
print('used: {:.5f}s'.format(toc-tic))

# speed up
import threading

tic = time.time()
def writefile(i):
    with open(str(i)+'.txt', 'w') as f:
        s = ('hello %d\n'%i) * 10000000
        f.write(s)

thread_list = []
for i in range(10, 20, 1):
    t = threading.Thread(target=writefile, args=(i, ))
    t.setDaemon(True)
    thread_list.append(t)

for t in thread_list:
    t.start()
for t in thread_list:
    t.join()

toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 1.25816s
# used: 1.21268s

9. 多线程multiprocessing加速

import time

import numpy as np


tic = time.time()

def muchjob(x):
    time.sleep(5)
    return(x**2)

ans = [muchjob(i) for i in range(8)]
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))

# speed up
import multiprocessing

tic = time.time()

def muchjob(x):
    time.sleep(5)
    return x**2
pool = multiprocessing.Pool(processes=4)
res = []
for i in range(8):
    res.append(pool.apply_async(muchjob, (i, )))
pool.close()
pool.join()

toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 40.03782s
# used: 10.07268s

reference:

https://mp.weixin.qq.com/s?__biz=MzU5MDY5OTI5MA==&mid=2247484643&idx=1&sn=55ba87185102042bf8c4641e14573161&chksm=fe3b0a96c94c83802ef2a8c4f35fb2364bf732f7931739a3d09386fa244634720b271892fe5b&token=1679241518&lang=zh_CN&scene=21#wechat_redirect

发布了249 篇原创文章 · 获赞 198 · 访问量 47万+

猜你喜欢

转载自blog.csdn.net/qq_27009517/article/details/103805099