NEW YORK crawl beautiful picture thumbnails:

OS Import 
# introduction request sending module:
Import Requests
# parse text import modules:
from BS4 Import the BeautifulSoup
# back one level:
base_path = os.path.dirname (os.path.abspath with (__ file__))
# Pictures folder path and splicing:
img1_path = the os.path.join (base_path, "IMG1")
# fetch response:
response = requests.get ( "http://pic.yesky.com/c/6_20491_1.shtml")
# results to the request analytical bs4:
Soup = the BeautifulSoup (response.text, "html.parser")
# after analysis target <div class = "lb_box">:
div_obj = soup.find (name = "div", attrs = { "class": "lb_box"})
# find all dl from div tags (each picture outside punctuation), the result is a list of
list_dd = div_obj.find_all (name = "dd")
# loop each picture dl:
for dd in list_dd:
# Find a dd in from:
a_obj = dd.find ( "a")
# splicing path of the folder and create a folder:
= os.path.join dir_path (img1_path, a_obj.text)
# If the folder does not exist to create:
IF not os.path.isdir (dir_path):
os.mkdir (dir_path)
# get the link href:
a_response = Requests. GET (a_obj.get ( "href"))
# transcoding
a_response.encoding = "GBK"
# text parsing
soup2 = BeautifulSoup (a_response.text, "html.parser")
# get the whole div:
div_obj2 = soup2.find (name = "div", attrs = { "class": "Overview"})
Print (div_obj2)
img_list = div_obj2.find_all (name = "IMG")
for IMG in img_list:
img_src = img.get ( "the src")
= requests.get img_response (img_src)
file_path = the os.path.join(dir_path,img_src.rsplit("/",1)[-1])
with open(file_path,"wb") as f:
f.write(img_response.content)
# break
效果如下:

Guess you like

Origin www.cnblogs.com/zhang-da/p/12208018.html