28 Apr 18 Asynchronous + callback thread queue thread event coroutine (yield, greenlet, gevent)

28 Apr 18
1. Asynchronous + callback mechanism
a. Problem introduction
question:
1) The return value of the task cannot be processed in a timely manner, and it must wait until all tasks have been run before they can be processed uniformly
2) The parsing process is executed serially. It takes 2s to parse once, and 18s to parse 9 times.
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
import them
import requests
import time
import random
 
def get(url):
    print('%s GET %s' %(os.getpid(),url))
    response=requests.get(url)
    time.sleep (random.randint (1,3))
    if response.status_code == 200:
        return response.text
 
def pasrse(res):
    print('%s parsing result is: %s' %(os.getpid(),len(res)))
 
if __name__ == '__main__':
    urls=[
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.python.org',
    ]
    pool=ProcessPoolExecutor(4)
    objs = []
    for url in urls:
        obj=pool.submit(get,url)
        objs.append(obj)
 
    pool.shutdown(wait=True)
 
    for obj in objs:
        res=obj.result()
        spend(res)
 
b. Advanced solution: The above two problems can be solved, but the information acquisition function set and the analytical information function pasrse are coupled together
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
import requests
import them
import time
import random
 
def get(url):
    print('%s GET %s' %(os.getpid(),url))
    response=requests.get(url)
    time.sleep (random.randint (1,3))
 
    if response.status_code == 200:
        pasrse(response.text)
 
def pasrse(res):
    print('%s parsing result is: %s' %(os.getpid(),len(res)))
 
if __name__ == '__main__':
    urls=[
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.python.org',
 
    ]
    pool=ProcessPoolExecutor(4)
    for url in urls:
        pool.submit(get,url)
 
c1. The ultimate solution: The above two problems can be solved, and the information acquisition function set and the analytical information function pasrse can be decoupled at the same time (process version)
The main process as the executor of the callback
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
import requests
import them
import time
import random
 
def get(url):
    print('%s GET %s' %(os.getpid(),url))
    response=requests.get(url)
    time.sleep (random.randint (1,3))
 
    if response.status_code == 200:
        # Do parsing work
        return response.text
 
def pasrse(obj): #The subsequent callback is that obj will pass itself to pasrse, so pasrse must have and only one parameter
    res=obj.result()
    print('%s parsing result is: %s' %(os.getpid(),len(res)))
 
if __name__ == '__main__':
    urls=[
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.python.org',
    ]
 
    pool=ProcessPoolExecutor(4)
    for url in urls:
        obj=pool.submit(get,url)
        obj.add_done_callback(pasrse)
 
    print('Main process', os.getpid())
 
c2, the ultimate solution: It can solve the above two problems, while decoupling the information acquisition function set and the parsing information function pasrse (thread version)
Which child process is idle will be the executor of the callback by that child process
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
from threading import current_thread
import requests
import them
import time
import random
 
def get(url):
    print('%s GET %s' %(current_thread().name,url))
    response=requests.get(url)
    time.sleep (random.randint (1,3))
 
    if response.status_code == 200:
        # Do parsing work
        return response.text
 
def pasrse(obj):
    res=obj.result()
    print('%s parsing result is: %s' %(current_thread().name,len(res)))
 
if __name__ == '__main__':
    urls=[
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.baidu.com',
        'https://www.python.org',
    ]
    pool=ThreadPoolExecutor(4)
    for url in urls:
        obj=pool.submit(get,url)
        obj.add_done_callback(pasrse)
 
    print('Main thread', current_thread().name)
 
2. Thread queue
import queue
 
q=queue.Queue(3) #Queue: first in first out
q.put(1)
q.put(2)
q.put(3)
# q.put(4) #blocking
print(q.get())
print(q.get())
print(q.get())
 
q=queue.LifoQueue(3) #Stack: last in first out
q.put('a')
q.put('b')
q.put('c')
print(q.get())
print(q.get())
print(q.get())
 
q=queue.PriorityQueue(3) #Priority queue: You can store values ​​in the queue in the form of small tuples. The first element represents the priority. The smaller the number, the higher the priority.
q.put((10,'user1'))
q.put((-3,'user2'))
q.put((-2,'user3'))
print(q.get())
print(q.get())
print(q.get())
 
3. Thread event
a. Case 1: After waiting for check to reset the value in the event, connect continues to run from event.wait()
from threading import Event,current_thread,Thread
import time
 
event=Event() #event maintains a global variable inside
 
def check():
    print('%s is checking whether the service is normal....' %current_thread().name)
    time.sleep(3)
    event.set() #Change the value of the global variable in the event
 
def connect():
    print('%s waiting for connection...' %current_thread().name)
    event.wait() #Wait for the value of the global variable to be reset; if the parentheses are 1, it will only wait for 1 second
    print('%s start connecting...' % current_thread().name)
 
if __name__ == '__main__':
    t1=Thread(target=connect)
    t2=Thread(target=connect)
    t3=Thread(target=connect)
    c1=Thread(target=check)
    t1.start()
    t2.start()
    t3.start()
c1.start()
 
b. Case 2: Exit after three brush attempts
from threading import Event,current_thread,Thread
import time
 
event=Event()
 
def check():
    print('%s is checking whether the service is normal....' %current_thread().name)
    time.sleep(5)
    event.set()
 
def connect():
    count=1
    while not event.is_set():
        if count ==  4:
            print('Too many attempts, please try again later')
            return
        print('%s trying to connect for the %sth time...' %(current_thread().name,count))
        event.wait(1)
        count+=1
    print('%s start connecting...' % current_thread().name)
 
if __name__ == '__main__':
    t1=Thread(target=connect)
    t2=Thread(target=connect)
    t3=Thread(target=connect)
    c1=Thread(target=check)
    t1.start()
    t2.start()
    t3.start()
    c1.start()
 
4. Coroutines
1. Implement concurrency under a single thread: coroutines (in order to improve efficiency; but not to say that all coroutines will improve efficiency)
   Concurrency refers to multiple tasks that appear to be running at the same time; the essence of concurrent implementation: switching + saving state
   Effective coroutines 'deceive' the CPU to a certain extent; through their own internal coordination, as soon as they encounter IO, they will switch to other programs of their own, making the CPU think that the program has been running, making it more likely to be in a ready state Or running state to occupy more CPU.
2. Three ways to achieve concurrency:
a) Concurrency under a single thread; controlled by the program itself, relatively fast
b) Concurrency under multithreading; controlled by the operating system, relatively slow
c) Concurrency under multi-process; controlled by the operating system, relatively slow
 
3. Based on the yield saved state, the two tasks can be switched back and forth directly, that is, the effect of concurrency (but the yield will not encounter blocking automatic switching programs)
   PS: If printing is added to each task, it is obvious that the printing of the two tasks is you and I, that is, concurrent execution.
 
import time
def consumer():
    '''Task 1: Receive data, process data'''
    while True:
        x=yield
 
def producer():
    '''Task 2: Production data'''
    g=consumer()
    next(g)
    for i in range(10000000):
        g.send(i)
 
start=time.time()
producer() #1.0202116966247559
stop=time.time()
print(stop-start)
 
# Pure computing tasks are executed concurrently
import time
def task1():
    res=1
    for i in range(1000000):
        res+=i
        yield
        time.sleep(10000) #yield will not automatically skip blocking
        print('task1')
 
def task2():
    g=task1()
    res=1
    for i in range(1000000):
        res*=i
        next(g)
        print('task2')
 
start=time.time()
task2()
stop=time.time()
print(stop-start)
 
5. Implementing IO switching under a single thread
1. Use greenlet (encapsulate yield, not automatically cut when IO is encountered)
from greenlet import greenlet
import time
 
def eat(name):
    print('%s eat 1' %name)
    time.sleep(30)
    g2.switch('alex') #Only pass the value at the first switch
    print('%s eat 2' %name)
    g2.switch()
def play(name):
    print('%s play 1' %name)
    g1.switch()
    print('%s play 2' %name)
 
g1=greenlet(eat)
g2=greenlet(play)
g1.switch('egon')
 
2. Use the gevent module (encapsulate greenlet, if you don't process it, you will automatically cut it when you encounter your own IO)
import guy
 
def eat(name):
    print('%s eat 1' %name)
    gevent.sleep(5) #Change to time.sleep(5), it will not automatically switch
    print('%s eat 2' %name)
def play(name):
    print('%s play 1' %name)
    vent.sleep(3)
    print('%s play 2' %name)
 
g1=gevent.spawn(eat,'egon')
g2=gevent.spawn(play,'alex')
 
# vent.sleep(100)
# g1.join()
# g2.join()
vent.joinall([g1,g2])
 
3. Use the gevent module (encapsulate the greenlet, if it is processed, it will also actively cut when other IOs are encountered)
from gevent import monkey;monkey.patch_all()
from threading import current_thread
import guy
import time
 
def eat():
    print('%s eat 1' %current_thread().name)
    time.sleep(5)
    print('%s eat 2' %current_thread().name)
def play():
    print('%s play 1' %current_thread().name)
    time.sleep(3)
    print('%s play 2' %current_thread().name)
 
g1=gevent.spawn(eat)
g2=gevent.spawn(play)
 
# vent.sleep(100)
# g1.join()
# g2.join()
print(current_thread().name)
vent.joinall([g1,g2])

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325116558&siteId=291194637