python笔记(爬虫 Scrapy websocket)

一、我给你10个图片的url,你帮我去把10张图片下载。

  1. 方案一:多线程

    import requests
    import threading
    
    
    urls = [
        'http://www.baidu.com/',
        'https://www.cnblogs.com/',
        'https://www.cnblogs.com/news/',
        'https://cn.bing.com/',
        'https://stackoverflow.com/',
    ]
    
    def task(url):
        response = requests.get(url)
        print(response)
    
    for url in urls:
        t = threading.Thread(target=task,args=(url,))
        t.start()
    
  2. 方案二:协程

     """
    协程+IO切换
    pip3 install gevent
    gevent内部调用greenlet(实现了协程)。
    """
    from gevent import monkey; monkey.patch_all()
    import gevent
    import requests
    
    
    def func(url):
        response = requests.get(url)
        print(response)
    
    urls = [
        'http://www.baidu.com/',
        'https://www.cnblogs.com/',
        'https://www.cnblogs.com/news/',
        'https://cn.bing.com/',
        'https://stackoverflow.com/',
    ]
    spawn_list = []
    for url in urls:
        spawn_list.append(gevent.spawn(func, url))
    
    gevent.joinall(spawn_list)
    
  3. 方案三:基于事件循环的异步非阻塞模块

    """
    基于事件循环的异步非阻塞模块:Twisted
    """
    from twisted.web.client import getPage, defer
    from twisted.internet import reactor
    
    def stop_loop(arg):
        reactor.stop()
    
    
    def get_response(contents):
        print(contents)
    
    deferred_list = []
    
    url_list = [
        'http://www.baidu.com/',
        'https://www.cnblogs.com/',
        'https://www.cnblogs.com/news/',
        'https://cn.bing.com/',
        'https://stackoverflow.com/',
    ]
    
    for url in url_list:
        deferred = getPage(bytes(url, encoding='utf8'))
        deferred.addCallback(get_response)
        deferred_list.append(deferred)
    
    
    dlist = defer.DeferredList(deferred_list)
    dlist.addBoth(stop_loop)
    
    reactor.run()
    
  4. 自定义基于事件循环的异步非阻塞模块

    chun.py

    import socket
    import select
    
    class ChunSheng(object):
    
        def __init__(self):
            self.socket_list = []
            self.conn_list = []
    
            self.conn_func_dict = {}
    
        def add_request(self,url_func):
            conn = socket.socket()
            conn.setblocking(False)
            try:
                conn.connect((url_func[0],80))
            except BlockingIOError as e:
                pass
            self.conn_func_dict[conn] = url_func[1]
    
            self.socket_list.append(conn)
            self.conn_list.append(conn)
    
        def run(self):
            """
            检测self.socket_list中的5个socket对象是否连接成功
            :return:
            """
            while True:
                #   select.select
                #   第一个参数: 用于检测其中socket是否已经获取到响应内容
                #   第二个参数: 用于检测其中socket是否已经连接成功
    
                # 第一个返回值 r:具体是那一个socket获取到结果
                # 第二个返回值 r:具体是那一个socket连接成功
                r,w,e = select.select(self.socket_list,self.conn_list,[],0.05)
                for sock in w: # [socket1,socket2]
                    sock.send(b'GET / http1.1\r\nhost:xxxx.com\r\n\r\n')
                    self.conn_list.remove(sock)
    
                for sock in r:
                    data = sock.recv(8096)
                    func = self.conn_func_dict[sock]
                    func(data)
                    sock.close()
                    self.socket_list.remove(sock)
    
                if not self.socket_list:
                    break
    

    views.py

    from chun import ChunSheng
    
    def callback1(data):
        print('下载完成',data)
    
    def callback2(data):
        print('下载完成',data)
    
    chun = ChunSheng()
    urls = [
        ('www.baidu.com',callback1),
        ('www.cnblogs.com',callback1),
        ('www.pythonav.com',callback2),
        ('www.bing.com',callback2),
        ('www.stackoverflow.com',callback2),
    ]
    for url in urls:
        chun.add_request(url)
    
    chun.run()
    

二、websocket

  1. websocket本质

    app.py

    from flask import Flask,render_template,request
    from geventwebsocket.handler import WebSocketHandler
    from gevent.pywsgi import WSGIServer
    import json
    
    app = Flask(__name__)
    
    USERS = {
        '1':{'name':'钢弹','count':0},
        '2':{'name':'铁锤','count':0},
        '3':{'name':'贝贝','count':100},
    }
    
    
    # http://127.0.0.1:5000/index
    @app.route('/index')
    def index():
        return render_template('index.html',users=USERS)
    
    # http://127.0.0.1:5000/message
    WEBSOCKET_LIST = []
    @app.route('/message')
    def message():
        ws = request.environ.get('wsgi.websocket')
        if not ws:
            print('http')
            return '您使用的是Http协议'
        WEBSOCKET_LIST.append(ws)
        while True:
            cid = ws.receive()
            if not cid:
                WEBSOCKET_LIST.remove(ws)
                ws.close()
                break
            old = USERS[cid]['count']
            new = old + 1
            USERS[cid]['count'] = new
            for client in WEBSOCKET_LIST:
                client.send(json.dumps({'cid':cid,'count':new}))
    
    
    
    if __name__ == '__main__':
        http_server = WSGIServer(('0.0.0.0', 5000), app, handler_class=WebSocketHandler)
        http_server.serve_forever()
    

    index.html

    <!DOCTYPE html>
    <html lang="zh-CN">
    <head>
        <meta charset="UTF-8">
        <title>Title</title>
        <meta name="viewport" content="width=device-width, initial-scale=1">
    </head>
    <body>
        <h1>丑男投票系统</h1>
        <ul>
            {% for k,v in users.items() %}
                <li onclick="vote({{k}})" id="id_{{k}}">{{v.name}}<span>{{v.count}}</span></li>
            {% endfor %}
        </ul>
    
        <script src="{{ url_for('static',filename='jquery-3.3.1.min.js')}}"></script>
        <script>
            var ws = new WebSocket('ws://192.168.13.253:5000/message')
            ws.onmessage = function (event) {
                /* 服务器端向客户端发送数据时,自动执行 */
                // {'cid':cid,'count':new}
                var response = JSON.parse(event.data);
                $('#id_'+response.cid).find('span').text(response.count);
    
            };
    
            function vote(cid) {
                ws.send(cid)
            }
        </script>
    </body>
    </html>
    
  2. 手动通过socket创建websocket请求

    server.py

    def get_headers(data):
        """
        将请求头格式化成字典
        :param data:
        :return:
        """
        header_dict = {}
        data = str(data, encoding='utf-8')
        header, body = data.split('\r\n\r\n', 1)
        header_list = header.split('\r\n')
        for i in range(0, len(header_list)):
            if i == 0:
                if len(header_list[i].split(' ')) == 3:
                    header_dict['method'], header_dict['url'], header_dict['protocol'] = header_list[i].split(' ')
            else:
                k, v = header_list[i].split(':', 1)
                header_dict[k] = v.strip()
        return header_dict
    
    
    def send_msg(conn, msg_bytes):
        """
        WebSocket服务端向客户端发送消息
        :param conn: 客户端连接到服务器端的socket对象,即: conn,address = socket.accept()
        :param msg_bytes: 向客户端发送的字节
        :return:
        """
        import struct
    
        token = b"\x81"
        length = len(msg_bytes)
        if length < 126:
            token += struct.pack("B", length)
        elif length <= 0xFFFF:
            token += struct.pack("!BH", 126, length)
        else:
            token += struct.pack("!BQ", 127, length)
    
        msg = token + msg_bytes
        conn.send(msg)
    
    
    import base64
    import hashlib
    import socket
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    sock.bind(('127.0.0.1', 8002))
    sock.listen(5)
    # 1. 等待用户连接
    conn, address = sock.accept()
    
    # 2. 接收验证消息
    msg = conn.recv(8096)
    msg_dict = get_headers(msg)
    
    # 3. 对数据加密
    value = msg_dict['Sec-WebSocket-Key'] + '258EAFA5-E914-47DA-95CA-C5AB0DC85B11'
    ac = base64.b64encode(hashlib.sha1(value.encode('utf-8')).digest())
    # print(ac,type(ac))
    # 4. 将加密后的结果返回给小强
    response_tpl = "HTTP/1.1 101 Switching Protocols\r\n" \
          "Upgrade:websocket\r\n" \
          "Connection: Upgrade\r\n" \
          "Sec-WebSocket-Accept: %s\r\n" \
          "WebSocket-Location: ws://127.0.0.1:8002\r\n\r\n"
    response = response_tpl %(ac.decode('utf-8'),)
    conn.send(response.encode('utf-8'))
    
    # 5. 接收小强发送的数据
    while True:
        """
        info = conn.recv(8096)
    
        payload_len = info[1] & 127
        if payload_len == 127:
            extend_payload_len = info[2:10]
            mask = info[10:14]
            decoded = info[14:]
        elif payload_len == 126:
            extend_payload_len = info[2:4]
            mask = info[4:8]
            decoded = info[8:]
        else:
            extend_payload_len = None
            mask = info[2:6]
            decoded = info[6:]
    
        bytes_list = bytearray()
        for i in range(len(decoded)):
            chunk = decoded[i] ^ mask[i % 4]
            bytes_list.append(chunk)
        body = str(bytes_list, encoding='utf-8')
        print(body)
        """
        import  time
        time.sleep(2)
        send_msg(conn,b'123123123')
    

    client.html

    <!DOCTYPE html>
    <html lang="zh-CN">
    <head>
        <meta charset="UTF-8">
        <title>Title</title>
        <meta name="viewport" content="width=device-width, initial-scale=1">
    </head>
    <body>
        <script>
            var ws = new WebSocket("ws://127.0.0.1:8002/xxxx")
    
            ws.onmessage = function (event) {
                /* 服务器端向客户端发送数据时,自动执行 */
                console.log(event.data);
    
            };
        </script>
    </body>
    </html>
    

猜你喜欢

转载自blog.csdn.net/qq_41433183/article/details/90229501