简单爬虫 爬取百度图片并批量重命名

一:爬取百度图片

注意:如果提示requests库不存在,命令行运行 pip install requests 即可

import json
import requests
import threading


def download_img(image_url, word, index):
    img_name = word + "_" + str(index)
    try:
        response_img = requests.get(image_url, headers={"Referer": "http://image.baidu.com"})
        with open("images/" + word + "/" + str(img_name) + ".jpg", 'wb') as f:
            f.write(response_img.content)
    except Exception as e:
        print("download_img")
        print(e)


def get_page(word, pn, rn):
    try:
        url = "http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord=" + word + "&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=&copyright=&word=" + word + "&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=1&fr=&expermode=&force=&cg=star&pn=" + str(
            pn) + "&rn=" + str(rn) + "&gsm=&1568680507018="
        index = pn + 1
        response = requests.get(url)
        obj = json.loads(response.text)
        items = obj['data']
        for item in items:
            if 'middleURL' in item:
                url_img = item['middleURL']
                threading.Thread(target=download_img, args=[url_img, word, index]).start()
                index += 1
    except Exception as e:
        print("get_page")
        print(e)


def get_n_page(page_num, word, pn, rn):
    for _ in range(page_num):
        get_page(word, pn, rn)
        pn += rn


def main():
	# 程序修改处
    word = "苍老师" # 查询关键字
    pn = 0  # 开始页码
    rn = 20  # 每页数量
    page_count = 3  # 获取页数
    import os
    if not os.path.isdir("images/" + word):
        os.makedirs("images/" + word)
    # get_page(word, pn, rn) # 获取单页方法
    get_n_page(page_count, word, pn, rn)  # 获取多页方法


main()

二:图片批量重命名

import os


def rename(dir_path, name):
    index = 0
    for file in os.listdir(dir_path):
        old_file_path = os.path.join(dir_path, file)
        new_file_path = os.path.join(dir_path, name + '.' + str(index) + '.jpg')
        os.rename(old_file_path, new_file_path)
        index += 1


if __name__ == '__main__':
	# 程序修改处
    dir_path = r"图片所在文件夹的路径"
    rename(dir_path, 'dj')

猜你喜欢

转载自blog.csdn.net/jw2268136570/article/details/100940100
今日推荐