测试代码运行的时间
import time
tic = time.time()
much_job = [x**2 for x in range(1, 1000000, 3)]
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# userd: 0.06204s
一、加速查找
1. 用set而非list
import time
data = [i**2+1 for i in range(1000000)]
list_data = list(data)
set_data = set(data)
# normal
tic = time.time()
s = 1098987 in list_data
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# speed up
tic = time.time()
ss = 1098987 in set_data
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# userd: 0.00833s
# userd: 0.00000s
2. 用dict而非两个list进行匹配查找
import time
list_a = [i*2-1 for i in range(1000000)]
list_b = [i**2 for i in list_a]
dict_ab = dict(zip(list_a, list_b))
# normal
tic = time.time()
a = list_b[list_a.index(876567)]
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# speed up
tic = time.time()
aa = dict_ab.get(876567, None)
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
# userd: 0.00394s
# userd: 0.00000s
二、加速循环,在循环体中避免重复计算,用循环机制代替递归函数
3. 用for而非while
import time
tic = time.time()
s, i = 0, 0
while i<100000:
i += 1
s += i
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
tic = time.time()
s, i = 0, 0
for i in range(1, 100001):
i += 1
s += i
toc = time.time()
print('userd: {:.5f}s'.format(toc-tic))
三、利用库函数进行加速
4. 用numba加速Python函数
import time
tic = time.time()
def my_power(x):
return (x**2)
def my_power_sum(n):
s = 0
for i in range(1, n+1):
s = s + my_power(i)
return s
s = my_power_sum(1000000)
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# speed up
from numba import jit
tic = time.time()
@jit
def my_power(x):
return (x**2)
@jit
def my_power_sum(n):
s = 0
for i in range(1, n+1):
s = s + my_power(i)
return s
ss = my_power_sum(1000000)
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# userd: 0.25207s
# userd: 0.09088s
代码是使用numpy做数字运算,并且常常有很多的循环,那么使用Numba就是一个很好的选择。
numba不适合字典型变量和一些非numpy的函数,尤其是上面numba不能解析pandas,上面的函数内容在运行时也就无法编译。
5. 用map加速Python函数
import time
tic = time.time()
res = [x**2 for x in range(1, 1000000, 3)]
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# speed up
tic = time.time()
res = map(lambda x:x**2, range(1, 1000000, 3))
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 0.07043s
# used: 0.00244s
6. 用filter加速Python函数
import time
tic = time.time()
res = [x**2 for x in range(1, 1000000, 3) if x%7==0]
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# speed up
tic = time.time()
res = filter(lambda x:x%7==0, range(1, 1000000, 3))
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 0.02840s
# used: 0.00033s
7. 用np.where加速if函数
import time
import numpy as np
array_a = np.arange(-100000, 100000)
tic = time.time()
relu = np.vectorize(lambda x: x if x>0 else 0)
arr = relu(array_a)
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# speed up
tic = time.time()
relu = lambda x:np.where(x>0, x, 0)
arrr = relu(array_a)
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 0.02531s
# used: 0.00083s
8. 多线程thread加速
import time
import numpy as np
tic = time.time()
def writefile(i):
with open(str(i)+'.txt', 'w') as f:
s = ('hello %d\n'%i) * 10000000
f.write(s)
for i in range(40,50, 1):
writefile(i)
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# speed up
import threading
tic = time.time()
def writefile(i):
with open(str(i)+'.txt', 'w') as f:
s = ('hello %d\n'%i) * 10000000
f.write(s)
thread_list = []
for i in range(10, 20, 1):
t = threading.Thread(target=writefile, args=(i, ))
t.setDaemon(True)
thread_list.append(t)
for t in thread_list:
t.start()
for t in thread_list:
t.join()
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 1.25816s
# used: 1.21268s
9. 多线程multiprocessing加速
import time
import numpy as np
tic = time.time()
def muchjob(x):
time.sleep(5)
return(x**2)
ans = [muchjob(i) for i in range(8)]
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# speed up
import multiprocessing
tic = time.time()
def muchjob(x):
time.sleep(5)
return x**2
pool = multiprocessing.Pool(processes=4)
res = []
for i in range(8):
res.append(pool.apply_async(muchjob, (i, )))
pool.close()
pool.join()
toc = time.time()
print('used: {:.5f}s'.format(toc-tic))
# used: 40.03782s
# used: 10.07268s
reference: