1.从网站找规律
F12-->network-->XHR,然后图片往下拉,XHR下会出现请求内容,点击进入Header找url请求规律
2.具体下载代码如下
#!/usr/bin/env Python # coding=utf-8 import json import itertools import urllib import requests import os import re import sys # 生成网址列表 def buildUrls(word): word = urllib.parse.quote(word) url = r"https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&word={word}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&pn={pn}&rn=30&gsm=78&1524745446180=" urls = (url.format(word=word, pn=x*30) for x in range(60)) return urls # 解析JSON获取图片URL re_url = re.compile(r'"thumbURL":"(.*?)"') def resolveImgUrl(html): imgUrls = re_url.findall(html) return imgUrls def downImg(imgUrl, dirpath, imgName): filename = os.path.join(dirpath, imgName) try: res = requests.get(imgUrl, timeout=15) if str(res.status_code)[0] == "4": print(str(res.status_code), ":", imgUrl) return False except Exception as e: print(" This is Exception:", imgUrl) print(e) return False with open(filename, "wb") as f: f.write(res.content) return True def mkDir(dirName): dirpath = os.path.join(sys.path[0], dirName) if not os.path.exists(dirpath): os.mkdir(dirpath) return dirpath if __name__ == '__main__': word = '鸟类' dirpath = mkDir("D:/getdata/down/baidu/"+word) urls = buildUrls(word) index = 0 for url in urls: print("requesting:", url) html = requests.get(url, timeout=10).content.decode('utf-8') imgUrls = resolveImgUrl(html) if len(imgUrls) == 0: # 没有图片则结束 break for url in imgUrls: if downImg(url, dirpath, "baidu"+str(index) + ".jpg"): index += 1 print("Downloaded %s picture" % index)