#!/usr/bin/env python
-- coding:utf-8 --
The minimum unit of resource allocation process
The smallest unit of execution thread CPU
As long as the thread of code are executed by the CPU on the line
A thread is scheduled by the operating system, the operating system is responsible for switching
Coroutine:
# 用户级别的,由我们自己写的python代码来控制切换的
# 是操作系统不可见的
In Cpython interpreter - and coroutine thread can use a multi-core, are alternately executed on one CPU
# 由于多线程本身就不能利用多核
# 所以即便是开启了多个线程也只能轮流在一个CPU上执行
# 协程如果把所有任务的IO操作都规避掉,只剩下需要使用CPU的操作
# 就意味着协程就可以做到题高CPU利用率的效果
Multithreading and coroutines
# 线程 切换需要操作系统,开销大,操作系统不可控,给操作系统的压力大
# 操作系统对IO操作的感知更加灵敏
# 协程 切换需要python代码,开销小,用户操作可控,完全不会增加操作系统的压力
# 用户级别能够对IO操作的感知比较低
#!/usr/bin/env python
-- coding:utf-8 --
Coroutine: capable of switching between a plurality of tasks in a thread back and forth, so each task is a coroutine
Two kinds of switching mode
# 原生python完成 yield asyncio
# C语言完成的python模块 greenlet gevent
greenlet
import time
from greenlet import greenlet
def eat():
print(‘wusir is eating’)
time.sleep(0.5)
g2.switch()
print(‘wusir finished eat’)
def sleep():
print ( 'Mr Ma is sleeping')
time.sleep(0.5)
print ( 'capable as finished sleep')
g1.switch()
g1 = greenlet(eat)
g2 = greenlet(sleep)
g1.switch()
gevent module
#!/usr/bin/env python
-- coding:utf-8 --
import time
print(’–>’,time.sleep)
import peddled
from gevent import monkey
monkey.patch_all()
def eat():
print(‘wusir is eating’)
print('in eat: ',time.sleep)
time.sleep(1)
print(‘wusir finished eat’)
def sleep():
print ( 'Mr Ma is sleeping')
time.sleep(1)
print ( 'capable as finished sleep')
g1 = gevent.spawn (eat) # create a task coroutine
g2 = gevent.spawn (sleep) # create a task coroutine
g1.join () # block until the task is completed g1
g2.join () # block until the task is completed g1
import time
import peddled
from gevent import monkey
monkey.patch_all()
def eat():
print(‘wusir is eating’)
time.sleep(1)
print(‘wusir finished eat’)
def sleep():
print ( 'Mr Ma is sleeping')
time.sleep(1)
print ( 'capable as finished sleep')
# G1 = gevent.spawn (eat) # create a task coroutine
# G3 = gevent.spawn (eat) # create a task coroutine
# G2 = gevent.spawn (sleep) # create a task coroutine
# # G1.join () # block until the task is completed g1
# # G2.join () # block until the task is completed g1
# Gevent.joinall ([g1, g2, g3])
g_l = []
for i in range(10):
g = gevent.spawn(eat)
g_l.append(g)
gevent.joinall(g_l)
import time
import peddled
from gevent import monkey
monkey.patch_all()
def eat():
print(‘wusir is eating’)
time.sleep(1)
print(‘wusir finished eat’)
return ‘wusir***’
def sleep():
print ( 'Mr Ma is sleeping')
time.sleep(1)
print ( 'capable as finished sleep')
return 'Brother Mark 666'
g1 = gevent.spawn(eat)
g2 = gevent.spawn(sleep)
gevent.joinall ([g1, g2])
print(g1.value)
print(g2.value)
asyncio module
#!/usr/bin/env python
-- coding:utf-8 --
import asyncio
Since a task
async def demo (): # coroutine method
print(‘start’)
await asyncio.sleep (1) # obstruction
print(‘end’)
loop = asyncio.get_event_loop () # Create an event loop
loop.run_until_complete (demo ()) # thrown into the demo task to perform in the event loop
Start multiple tasks, and no return value
async def demo (): # coroutine method
print(‘start’)
await asyncio.sleep (1) # obstruction
print(‘end’)
loop = asyncio.get_event_loop () # Create an event loop
wait_obj = asyncio.wait([demo(),demo(),demo()])
loop.run_until_complete(wait_obj)
Start multiple jobs and have a return value
async def demo (): # coroutine method
print(‘start’)
await asyncio.sleep (1) # obstruction
print(‘end’)
return 123
loop = asyncio.get_event_loop()
t1 = loop.create_task(demo())
t2 = loop.create_task(demo())
tasks = [t1,t2]
wait_obj = asyncio.wait([t1,t2])
loop.run_until_complete(wait_obj)
for t in tasks:
print(t.result())
Who should first take back whose results
import asyncio
async def demo (i): # coroutine method
print(‘start’)
await asyncio.sleep (10-i) # obstruction
print(‘end’)
return i,123
async def main():
task_l = []
for i in range(10):
task = asyncio.ensure_future(demo(i))
task_l.append(task)
for ret in asyncio.as_completed(task_l):
res = await court
print(res)
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
import asyncio
async def get_url():
reader,writer = await asyncio.open_connection(‘www.baidu.com’,80)
writer.write(b’GET / HTTP/1.1\r\nHOST:www.baidu.com\r\nConnection:close\r\n\r\n’)
all_lines = []
async for line in reader:
data = line.decode()
all_lines.append(data)
html = ‘\n’.join(all_lines)
return html
async def main():
tasks = []
for url in range(20):
tasks.append(asyncio.ensure_future(get_url()))
for res in asyncio.as_completed(tasks):
result = await res
print(result)
if name == ‘main’:
loop = asyncio.get_event_loop()
loop.run_until_complete (main ()) # deal with a task
Python underlying native coroutine module
# 爬虫 webserver框架
# 题高网络编程的效率和并发效果
grammar
# await 阻塞 协程函数这里要切换出去,还能保证一会儿再切回来
# await 必须写在async函数里,async函数是协程函数
# loop 事件循环
# 所有的协程的执行 调度 都离不开这个loop
All focus
#!/usr/bin/env python
-- coding:utf-8 --
operating system
# 1.计算机中所有的资源都是由操作系统分配的
# 2.操作系统调度任务:时间分片、多道机制
# 3.CPU的利用率是我们努力的指标
Complicated by
# 进程 开销大 数据隔离 资源分配单位 cpython下可以利用多核
# 进程的三状态:就绪 运行 阻塞
# multiprocessing模块
# Process-开启进程
# Lock - 互斥锁
# 为什么要在进程中加锁
# 因为进程操作文件也会发生数据不安全
# Queue -队列 IPC机制(Pipe,redis,memcache,rabbitmq,kafka)
# 生产者消费者模型
# Manager - 提供数据共享机制
# 线程 开销小 数据共享 cpu调度单位 cpython下不能利用多核
# GIL锁
# 全局解释器锁
# Cpython解释器提供的
# 导致了一个进程中多个线程同一时刻只有一个线程能当问CPU -- 多线程不能利用多核
# threading
# Thread类 - 能开启线程start,等待线程结束join
# Lock-互斥锁 不能在一个线程中连续acquire,效率相对高
# Rlock-递归锁 可以在一个线程中连续acquire,效率相对低
# 死锁现象如何发生?如何避免?
# 线程队列 queue模块
# Queue
# LifoQueue
# PriorityQueue
# 池
# concurrent.futrues.ThreadPoolExecutor,ProcessPoolExecutor
# 实例化一个池 tp = ThreadPoolExecutor(num),pp = ProcessPoolExecutor(num)
# 提交任务到池中,返回一个对象 obj = tp.submit(func,arg1,arg2...)
# 使用这个对象获取返回值 obj.result()
# 回调函数 obj.add_done_callback(函调函数)
# 阻塞等待池中的任务都结束 tp.shutdown()
concept
# IO操作
# 同步异步
# 阻塞非阻塞
#!/usr/bin/env python
-- coding:utf-8 --
Lock - can maintain data security between threads
# 互斥锁 :一把锁不能在一个线程中连续acquire,开销小
# 递归锁 :一把锁可以连续在一个线程中acquire多次,acquire多少次就release多少次,开销大
# 死锁现象
# 在某一些线程中出现陷入阻塞并且永远无法结束阻塞的情况就是死锁现象
# 出现死锁:
# 多把锁+交替使用
# 互斥锁在一个线程中连续acquire
# 避免死锁
# 在一个线程中只有一把锁,并且每一次acquire之后都要release
# += -= *= /= ,多个线程对同一个文件进行写操作
queue
# 先进先出 Queue
# 后进先出 LifoQueue
# 优先级 PriorityQueue
Pool
# from concurrent.futrues import ThreadPoolExecutor
# 1.是单独开启线程进程还是池?
# 如果只是开启一个子线程做一件事情,就可以单独开线程
# 有大量的任务等待程序去做,要达到一定的并发数,开启线程池
# 根据你程序的io操作也可以判定是用池还是不用池?
# socket的server 大量的阻塞io recv recvfrom socketserver
# 爬虫的时候 池
# 2.回调函数add_done_callback
# 执行完子线程任务之后直接调用对应的回调函数
# 爬取网页 需要等待数据传输和网络上的响应高IO的 -- 子线程
# 分析网页 没有什么IO操作 -- 这个操作没必要在子线程完成,交给回调函数
# 3.ThreadPoolExecutor中的几个常用方法
# tp = ThreadPoolExecutor(cpu*5)
# obj = tp.submit(需要在子线程执行的函数名,参数)
# obj
# 1.获取返回值 obj.result() 是一个阻塞方法
# 2.绑定回调函数 obj.add_done_callback(子线程执行完毕之后要执行的代码对应的函数)
# ret = tp.map(需要在子线程执行的函数名,iterable)
# 1.迭代ret,总是能得到所有的返回值
# shutdown
# tp.shutdown()
Processes and threads are lock
# 所有在线程中能工作的基本都不能在进程中工作
# 在进程中能够使用的基本在线程中也可以使用
import time
from concurrent.futures import ThreadPoolExecutor
def son():
print(123)
time.sleep(3)
return 123
def call_back(num):
print(num.result())
t = ThreadPoolExecutor(20)
obj = t.submit(son)
print('main : ',obj)
obj.add_done_callback(call_back)
Multi-threaded multi-process started in Lane
import os
from multiprocessing import Process
from threading import Thread
def tfunc():
print(os.getpid())
def pfunc():
print(‘pfunc–>’,os.getpid())
Thread(target=tfunc).start()
if name == ‘main’:
Process(target=pfunc).start()
Multiplayer Chat
end server
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import gevent
from gevent import monkey
monkey.patch_all()
import socket
def chat(conn):
while True:
msg = conn.recv(1024).decode('utf-8')
conn.send(msg.upper().encode('utf-8'))
sk = socket.socket()
sk.bind(('127.0.0.1',9000))
sk.listen()
while True:
conn,_ = sk.accept()
gevent.spawn(chat,conn)
# 5*20*500 = 50000
client
#!/usr/bin/env python
-- coding:utf-8 --
import time
import socket
def client(i):
sk = socket.socket()
sk.connect((‘127.0.0.1’,9000))
while True:
sk.send('hello'.encode('utf-8'))
print(i*'*',sk.recv(1024))
time.sleep(0.5)
from threading import Thread
for i in range(500):
Thread(target=client,args =(i,)).start()
client2
#!/usr/bin/env python
-- coding:utf-8 --
import time
import socket
sk = socket.socket()
sk.connect((‘127.0.0.1’,9000))
while True:
sk.send(‘hello’.encode(‘utf-8’))
print(sk.recv(1024))
time.sleep(0.5)