The python crawling doraemon improve efficiency (asynchronous threaded multitasking coroutine +)

# ## + 5. Cadogan threaded asynchronous task coroutine
 
** thread pool: ** 

`` `Python 
from multiprocessing.dummy Import Pool
 Import Requests
 Import Time
 # synchronization code 
Start = the time.time () 
the pool = Pool (. 3 ) 
URLs = [ ' http://127.0.0.1:5000/bobo ' , ' http://127.0.0.1:5000/jay ' , ' http://127.0.0.1:5000/tom ' ]
 for URL in URLs : 
   page_text = requests.get (URL) .text
    Print (page_text)
Print ( ' Total time: ' , the time.time () - Start) 

# asynchronous code 
Start = the time.time () 
the pool = Pool (. 3 ) 
URLs = [ ' http://127.0.0.1:5000/bobo ' , ' http://127.0.0.1:5000/jay ' , ' http://127.0.0.1:5000/tom ' ]
 DEF Get_Request (URL):
     return requests.get (URL) .text 

response_list = pool.map (Get_Request , urls)
 Print (response_list) 

# parse 
DEF the parse (page_text):
    Print (len (page_text)) 

pool.map (the parse, response_list) 
Print ( ' Total time: ' , the time.time () - Start) 
`` `

 ** coroutine objects ** 

` `` Python 
from Time Import SLEEP
 Import ASYNCIO 

the async DEF Get_Request (URL):
     Print ( ' being requested: ' , URL) 
    SLEEP ( 2 )
     Print ( ' end of the request: ' , URL) 

C = Get_Request ( ' www.1.com ' )
 Print  (C)
`` `

 ** task object ** 

` Python 
from Time Import SLEEP
 Import ASYNCIO 

# callback: 
# default parameters: task object 
DEF the callback (Task):
     Print ( ' I AM !! the callback. 1 ' )
     Print (Task. Result ()) # Result returned is the task of the object corresponding to that particular function returns the value of 

the async DEF Get_Request (URL):
     Print ( ' being requested: ' , URL) 
    SLEEP ( 2 )
     Print ( ' end of the request: ' , URL)
     return  'Bobo Hello ' 

# Create a coroutine objects 
C = Get_Request ( ' www.1.com ' )
 # package a task object 
Task = asyncio.ensure_future (C) 

# will be performed after the task object binding to the callback function, coroutine execution the callback function 
task.add_done_callback (callback) 

# create an event loop target 
loop = asyncio.get_event_loop () 
loop.run_until_complete (task) # to register the event loop task object to object and open the event loop 
`` ` 

# ### 5.1 multitasking asynchronous coroutine 

`` `Python 
Import ASYNCIO
 from Time Import SLEEP
 Import Time 
Start =  the time.time ()
URLs =[
     ' HTTP: // localhost: 5000 / Bobo ' ,
     ' HTTP: // localhost: 5000 / Bobo ' ,
     ' HTTP: // localhost: 5000 / Bobo ' 
] 
# can not appear in the code block is not supported to be executed asynchronous code module 
# within the interior blocking function if the operation key must be modified await 
the async DEF Get_Request (URL):
     Print ( ' being requested: ' , URL) 
    await asyncio.sleep ( 2 )
     Print ( ' end of the request: ' , URL)
     return  ' Hello Bobo ' 

Tasks = []#Placing all of the task objects 
for url in urls: 
    c = Get_Request (url) 
    Task = asyncio.ensure_future (c) 
    tasks.append (Task) 

Loop = asyncio.get_event_loop () 
loop.run_until_complete (asyncio.wait (Tasks)) 

Print ( the time.time () - Start) 
`` `

 ** multitasking applications in asynchronous crawlers coroutine ** 

` `` Python 
Import ASYNCIO
 Import Requests
 Import Time 
Start = the time.time () 
URLs = [
     ' HTTP: // localhost : 5000 / Bobo ' ,
     'HTTP: // localhost: 5000 / Bobo ' ,
     ' HTTP: // localhost: 5000 / Bobo ' 
] 
# can not be achieved asynchronous effect: because the requests module is a module does not support asynchronous of 
the async DEF REQ (url): 
    page_text = requests.get (URL) .text
     return page_text 

Tasks = []
 for URL in URLs: 
    C = REQ (URL) 
    Task = asyncio.ensure_future (C) 
    tasks.append (Task) 

Loop = asyncio.get_event_loop () 
loop.run_until_complete ( asyncio.wait (Tasks)) 

Print (time.time () -start)
```

#### 5.2 aiohttp(requests不支持异步)

```python
import asyncio
import requests
import time
import aiohttp
from lxml import etree
urls = [
    'http://localhost:5000/bobo',
    'http://localhost:5000/bobo',
    'http://localhost:5000/bobo',
    'http://localhost:5000/bobo',
    'http://localhost:5000/bobo',
     ' HTTP: // localhost: 5000 / Bobo ' , 
] 
# can not be achieved asynchronous effect: because the requests module is a module does not support asynchronous of 
the async DEF REQ (url): 
    the async with aiohttp.ClientSession () AS S: 
        the async the await s.get with (URL) AS Response: 
            # response.read (): byte 
            page_text = the await response.text ()
             return page_text 

    # details: each preceded with async, precede every step blocking operation the await 

DEF the parse (Task): 
    page_text = task.result () 
    Tree = etree.HTML (page_text) 
    name = tree.xpath ( ' // P / text () ')[0]
    print(name)
if __name__ == '__main__':
    start = time.time()
    tasks = []
    for url in urls:
        c = req(url)
        task = asyncio.ensure_future(c)
        task.add_done_callback(parse)
        tasks.append(task)

    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))

    print(time.time()-start)
```

 

Guess you like

Origin www.cnblogs.com/doraemon548542/p/11972550.html