Python crawler---socket module http request to download pictures

import socket
import re

# 获取到资源地址 这里https要改成http
urls=['http://pic.netbian.com/uploads/allimg/220211/004115-1644511275bc26.jpg','http://pic.netbian.com/uploads/allimg/220215/233510-16449393101c46.jpg','http://pic.netbian.com/uploads/allimg/211120/005250-1637340770807b.jpg']
for url in urls:
    client = socket.socket()
    # 创建连接
    client.connect(('pic.netbian.com', 80))
    # 构造http请求
    http_res = 'GET ' + url + ' HTTP/1.0\r\nHost:pic.netbian.com\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36\r\n\r\n'
    # 发送请求
    client.send(http_res.encode())
    # 建立一个二进制对象用来存储我们得到的数据
    result = b''
    data = client.recv(1024)
    # 循环接收响应数据 添加到bytes类型
    while data:
        result += data
        data = client.recv(1024)
    print(result)
    # 提取数据
    # re.S使 . 匹配包括换行在内的所有字符 去掉响应头
    images = re.findall(b'\r\n\r\n(.*)', result, re.S)
    # print(images[0])
    # 打开一个文件,将我们读取到的数据存入进去,即下载到本地我们获取到的图片
    with open(url.split('/')[-1], 'wb') as f:
        f.write(images[0])

Guess you like

Origin blog.csdn.net/weixin_45387160/article/details/127482433