The 09.Python processes, threads, coroutines

1. processes, threads, coroutines contrast

Process is a unit of resource allocation
Thread scheduling unit is the operating system
The process of switching resources are needed most, inefficient
Thread switching needed resources in general, the general efficiency (of course without considering the GIL)
Coroutine task switching resources is small, high efficiency
Multi-process, multi-thread based on the number of cpu core is not as likely to be parallel, but coroutines is in a thread so that concurrent

2. Process

2.1 Multitasking - process

import time
import multiprocessing


def test1():
    while True:
        print("1--------")
        time.sleep(1)


def test2():
    while True:
        print("2--------")
        time.sleep(1)


def main():

    p1 = multiprocessing.Process(target=test1)
    p2 = multiprocessing.Process(target=test2)
    p1.start()
    p2.start()


if __name__ == "__main__":
    main()

2.2. Acquisition process pid

import multiprocessing
import os
import time


def test():
    while True:
        print("----in 子进程 pid=%d ,父进程的pid=%d---" % (os.getpid(), os.getppid()))
        time.sleep(1)


def main():
    print("----in 主进程 pid=%d---父进程pid=%d----" % (os.getpid(), os.getppid()))
    p = multiprocessing.Process(target=test)
    p.start()


if __name__ == "__main__":
    main()

2.3. Process to pass parameters

import multiprocessing
import os


def test(a, b, c, *args, **kwargs):
    print(a)
    print(b)
    print(c)
    print(args)
    print(kwargs)


def main():
    print("----in 主进程 pid=%d---父进程pid=%d----" % (os.getpid(), os.getppid()))
    p = multiprocessing.Process(target=test, args=(11, 22, 33, 44, 55, 66, 77, 88), kwargs={"mm":11})
    p.start()


if __name__ == "__main__":
    main()

Do not share global variables between 2.4 multi-process

import multiprocessing
import os
import time

nums = [11, 22, 33]


def test():
    nums.append(44)
    print("在进程中1中nums=%s" % str(nums))
    time.sleep(3)


def test2():
    print("在进程中2中nums=%s" % str(nums))


def main():
    print("----in 主进程 pid=%d---父进程pid=%d----" % (os.getpid(), os.getppid()))
    p = multiprocessing.Process(target=test)
    p.start()

    p.join()

    p2 = multiprocessing.Process(target=test2)
    p2.start()


if __name__ == "__main__":
    main()

2.5. Multiple processes to share data through Queue

import multiprocessing

"""
一个进程向Queue中写入数据,另外一个进程从Queue中获取数据,
通过Queue完成了 多个需要配合的进程间的数据共享,从而能够 起到 解耦的作用
"""


def download_from_web(q):
    """下载数据"""
    # 模拟从网上下载的数据
    data = [11, 22, 33, 44]

    # 向队列中写入数据
    for temp in data:
        q.put(temp)

    print("---下载器已经下载完了数据并且存入到队列中----")


def analysis_data(q):
    """数据处理"""
    waitting_analysis_data = list()
    # 从队列中获取数据
    while True:
        # 这是一个阻塞方法
        data = q.get()
        waitting_analysis_data.append(data)

        if q.empty():
            break

    # 模拟数据处理
    print(waitting_analysis_data)


def main():
    # 1. 创建一个队列
    q = multiprocessing.Queue()

    # 2. 创建多个进程,将队列的引用当做实参进行传递到里面
    p1 = multiprocessing.Process(target=download_from_web, args=(q,))
    p2 = multiprocessing.Process(target=analysis_data, args=(q,))
    p2.start()
    p1.start()


if __name__ == "__main__":
    main()

2.6. Process pool

# -*- coding:utf-8 -*-
import os
import random
import time
from multiprocessing import Pool


def worker(msg):
    t_start = time.time()
    print("%s开始执行,进程号为%d" % (msg, os.getpid()))
    # random.random()随机生成0~1之间的浮点数
    time.sleep(random.random() * 2)
    t_stop = time.time()
    print(msg, "执行完毕,耗时%0.2f" % (t_stop - t_start))


if __name__ == '__main__':
    po = Pool(3)  # 定义一个进程池,最大进程数3
    for i in range(0, 10):
        # Pool().apply_async(要调用的目标,(传递给目标的参数元祖,))
        # 每次循环将会用空闲出来的子进程去调用目标
        po.apply_async(worker, (i,))

    print("----start----")
    po.close()  # 关闭进程池,关闭后po不再接收新的请求
    po.join()  # 等待po中所有子进程执行完成,必须放在close语句之后
    print("-----end-----")

2.7. Multitasking folder copy-v2- show progress

import os
import multiprocessing


def copy_file(q, file_name, old_folder_name, new_folder_name):
    """完成文件的复制"""
    # print("======>模拟copy文件:从%s--->到%s 文件名是:%s" % (old_folder_name, new_folder_name, file_name))
    old_f = open(old_folder_name + "/" + file_name, "rb")
    content = old_f.read()
    old_f.close()

    new_f = open(new_folder_name + "/" + file_name, "wb")
    new_f.write(content)
    new_f.close()

    # 如果拷贝完了文件,那么就向队列中写入一个消息,表示已经完成
    q.put(file_name)


def main():
    # 1. 获取用户要copy的文件夹的名字
    old_folder_name = input("请输入要copy的文件夹的名字:")

    # 2. 创建一个新的文件夹
    try:
        new_folder_name = old_folder_name + "[复件]"
        os.mkdir(new_folder_name)
    except:
        pass

    # 3. 获取文件夹的所有的待copy的文件名字  listdir()
    file_names = os.listdir(old_folder_name)

    # 4. 创建进程池
    po = multiprocessing.Pool(5)

    # 5. 创建一个队列
    q = multiprocessing.Manager().Queue()

    # 6. 向进程池中添加 copy文件的任务
    for file_name in file_names:
        po.apply_async(copy_file, args=(q, file_name, old_folder_name, new_folder_name))

    po.close()
    # po.join()
    all_file_num = len(file_names)  # 测一下所有的文件个数
    copy_ok_num = 0
    while True:
        file_name = q.get()
        # print("已经完成copy:%s" % file_name)
        copy_ok_num += 1
        print(copy_ok_num) # 把这句话去掉,进度显示就不正常了...
        print("\r拷贝的进度为:%.2f %%" % (copy_ok_num * 100 / all_file_num), end="")
        if copy_ok_num >= all_file_num:
            break


if __name__ == "__main__":
    main()

2.8 Understanding more about the queue

  • https://blog.csdn.net/u013713010/article/details/53325438

3. Thread

3.1 Multitasking - Thread

import time
import threading


def sing():
    """唱歌 5秒钟"""
    for i in range(5):
        print("----正在唱:菊花茶----")
        time.sleep(1)


def dance():
    """跳舞 5秒钟"""
    for i in range(5):
        print("----正在跳舞----")
        time.sleep(1)


def main():
    t1 = threading.Thread(target=sing)
    t2 = threading.Thread(target=dance)
    t1.start()
    t2.start()


if __name__ == "__main__":
    main()

3.2. Multi-threaded shared global variables

import threading
import time


def test1(temp):
    temp.append(33)
    print("-----in test1 temp=%s----" % str(temp))


def test2(temp):
    print("-----in test2 temp=%s----" % str(temp))


g_nums = [11, 22]


def main():
    # target指定将来 这个线程去哪个函数执行代码
    # args指定将来调用 函数的时候 传递什么数据过去
    t1 = threading.Thread(target=test1, args=(g_nums,))
    t2 = threading.Thread(target=test2, args=(g_nums,))

    t1.start()
    time.sleep(1)

    t2.start()
    time.sleep(1)

    print("-----in main Thread g_nums = %s---" % str(g_nums))


if __name__ == "__main__":
    main()

3.3 The issue of shared global variables - competition for resources

import threading
import time

# 定义一个全局变量
g_num = 0


def test1(num):
    global g_num
    for i in range(num):
        g_num += 1
    print("-----in test1 g_num=%d----" % g_num)


def test2(num):
    global g_num
    for i in range(num):
        g_num += 1
    print("-----in test2 g_num=%d=----" % g_num)


def main():
    t1 = threading.Thread(target=test1, args=(1000000,))
    t2 = threading.Thread(target=test2, args=(1000000,))

    t1.start()
    t2.start()

    # 等待上面的2个线程执行完毕....
    time.sleep(5)

    print("-----in main Thread g_num = %d---" % g_num)


if __name__ == "__main__":
    main()

3.4. The problem with using a mutex solving resource competition 1

import threading
import time

# 定义一个全局变量
g_num = 0


def test1(num):
    global g_num
    # 上锁,如果之前没有被上锁,那么此时 上锁成功
    # 如果上锁之前 已经被上锁了,那么此时会堵塞在这里,直到 这个锁被解开位置
    mutex.acquire()
    for i in range(num):
        g_num += 1
    # 解锁
    mutex.release()
    print("-----in test1 g_num=%d----" % g_num)


def test2(num):
    global g_num
    mutex.acquire()
    for i in range(num):
        g_num += 1
    mutex.release()
    print("-----in test2 g_num=%d=----" % g_num)


# 创建一个互斥锁,默认是没有上锁的
mutex = threading.Lock()


def main():
    t1 = threading.Thread(target=test1, args=(1000000,))
    t2 = threading.Thread(target=test2, args=(1000000,))

    t1.start()
    t2.start()

    # 等待上面的2个线程执行完毕....
    time.sleep(2)

    print("-----in main Thread g_num = %d---" % g_num)

if __name__ == "__main__":
    main()


3.5. Multitasking is udp chat

import socket
import threading


def recv_msg(udp_socket):
    """接收数据并显示"""

    # 接收数据
    while True:
        recv_data = udp_socket.recvfrom(1024)
        print(recv_data)


def send_msg(udp_socket, dest_ip, dest_port):
    """发送数据"""
    # 发送数据
    while True:
        send_data = input("输入要发送的数据:")
        udp_socket.sendto(send_data.encode("utf-8"), (dest_ip, dest_port))


def main():
    """完成udp聊天器的整体控制"""

    # 1. 创建套接字
    udp_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

    # 2. 绑定本地信息
    udp_socket.bind(("", 7890))

    # 3. 获取对方的ip
    dest_ip = input("请输入对方的ip:")
    dest_port = int(input("请输入对方的port:"))

    # 4. 创建2个线程,去执行相应的功能
    t_recv = threading.Thread(target=recv_msg, args=(udp_socket,))
    t_send = threading.Thread(target=send_msg, args=(udp_socket, dest_ip, dest_port))

    t_recv.start()
    t_send.start()


if __name__ == "__main__":
    main()


3.6. Python and multithreaded Java in comparison

Outline

	在一个进程中,我们同时开启多个线程,让多个线程同时去完成某些任务(功能)。多线程的目的:
	提高程序的运行效率。

Multithreading principle

	cpu在线程中做时间片的切换。
	其实真正电脑中的程序的运行不是同时在运行的。CPU负责程序的运行,而CPU在运行程序的过程中某个时刻点上,
	它其实只能运行一个程序。而不是多个程序。而CPU它可以在多个程序之间进行高速的切换。而切换频率和速度太
	快,导致人的肉看看不到。每个程序就是进程, 而每个进程中会有多个线程,而CPU是在这些线程之间进行切换。
	了解了CPU对一个任务的执行过程,我们就必须知道,多线程可以提高程序的运行效率,但不能无限制的开线程。

Pseudo multithreading in Python, and Java attention to the distinction between (*****)

	Python中的多线程没有真正实现多现程! 为什么这么说,我们了解一个概念,全局解释器锁(GIL)。
	Python代码的执行由Python虚拟机(解释器)来控制。
	Python在设计之初就考虑要在主循环中,同时只有一个线程在执行,就像单CPU的系统中运行多个进程那样,内存
	中可以存放多个程序,但任意时刻,只有一个程序在CPU中运行。
	同样地,虽然Python解释器可以运行多个线程,只有一个线程在解释器中运行。 

	对Python虚拟机的访问由全局解释器锁(GIL)来控制,正是这个锁能保证同时只有一个线程在运行。在多线程环
	境中,Python虚拟机按照以下方式执行。
    	1.设置GIL。
    	2.切换到一个线程去执行。
    	3.运行。
    	4.把线程设置为睡眠状态。
    	5.解锁GIL。
    	6.再次重复以上步骤。
	 python 每执行100个字节码,GIL锁就会解锁一次,让其它线程执行,所以,python多线程环境,是交替执行,上下文
	 切换,并没有同一时刻执行代码.

4. coroutine

4.1 Using yield to complete multiple tasks (coroutines)

import time


def task_1():
    while True:
        print("---1----")
        time.sleep(0.1)
        yield 1


def task_2():
    while True:
        print("---2----")
        time.sleep(0.1)
        yield 2


def main():
    t1 = task_1()
    t2 = task_2()
    # 先让t1运行一会,当t1中遇到yield的时候,再返回到24行,然后
    # 执行t2,当它遇到yield的时候,再次切换到t1中
    # 这样t1/t2/t1/t2的交替运行,最终实现了多任务....协程
    while True:
        ret1 = next(t1)
        print(ret1)
        ret2 = next(t2)
        print(ret2)


if __name__ == "__main__":
    main()

4.2 Use greenlet complete multiple tasks (coroutines)

from greenlet import greenlet
import time


def test1():
    while True:
        print("---A--")
        gr2.switch()
        time.sleep(0.5)


def test2():
    while True:
        print("---B--")
        gr1.switch()
        time.sleep(0.5)


gr1 = greenlet(test1)
gr2 = greenlet(test2)

# 切换到gr1中运行
gr1.switch()

4.3. Gevent multitasking (coroutines)

import gevent
import time


def f1(n):
    for i in range(n):
        print(gevent.getcurrent(), i)
        # time.sleep(0.5)
        gevent.sleep(0.5)


def f2(n):
    for i in range(n):
        print(gevent.getcurrent(), i)
        # time.sleep(0.5)
        gevent.sleep(0.5)


def f3(n):
    for i in range(n):
        print(gevent.getcurrent(), i)
        # time.sleep(0.5)
        gevent.sleep(0.5)


print("----1---")
g1 = gevent.spawn(f1, 5)
print("----2---")
g2 = gevent.spawn(f2, 5)
print("----3---")
g3 = gevent.spawn(f3, 5)
print("----4---")
g1.join()
g2.join()
g3.join()

4.4. Gevent patch

from gevent import monkey
import gevent
import random
import time

# 有耗时操作时需要
monkey.patch_all()  # 将程序中用到的耗时操作的代码,换为gevent中自己实现的模块


def coroutine_work(coroutine_name):
    for i in range(10):
        print(coroutine_name, i)
        time.sleep(random.random())


gevent.joinall([
    gevent.spawn(coroutine_work, "work1"),
    gevent.spawn(coroutine_work, "work2")
])

4.5. downloader

import urllib.request
import gevent
from gevent import monkey

monkey.patch_all()


def downloader(img_name, img_url):
    req = urllib.request.urlopen(img_url)

    img_content = req.read()

    with open(img_name, "wb") as f:
        f.write(img_content)


def main():
    gevent.joinall([
        gevent.spawn(downloader, "3.jpg",
                     "https://rpic.douyucdn.cn/appCovers/2017/09/22/1760931_20170922133718_big.jpg"),
        gevent.spawn(downloader, "4.jpg",
                     "https://rpic.douyucdn.cn/appCovers/2017/09/17/2308890_20170917232900_big.jpg")
    ])


if __name__ == '__main__':
    main()

5. processes, threads and coroutines introduction and usage scenarios --Python

Published 85 original articles · won praise 12 · views 3752

Guess you like

Origin blog.csdn.net/fanjianhai/article/details/103642910