__author__ = 'Administrator' # -*- encoding=gbk -*- import requests import os from bs4 import BeautifulSoup from urllib.parse import urlparse r=requests.get('http://xiachufang.com/') soup=BeautifulSoup(r.text) img_list=[] for img in soup.select('img'): if img.has_attr('data-src'): img_list.append(img.attrs['data-src']) else: img_list.append(img.attrs['src']) image_dir=os.path.join(os.curdir,'images') print(image_dir) print(os.curdir) if not os.path.isdir(image_dir): os.makedirs(image_dir) for img in img_list: a = urlparam (img) filename=o.path[1:].split('@')[0] filepath=os.path.join(image_dir,filename) print(img.split('?')[0].split('@')[0]) resp=requests.get(img.split('?')[0].split('@')[0]) Open with (filepath, ' WB ' ) AS F: for the chunk in resp.iter_content (1024): # set the write cache block size f.write (chunk)
urllib python3 standard library parse from urllib.request import urlopen r=urlopen("http://httpbin.org/get") r.read () # get the binary content of text = r.read (). decode ( " UTF-8 " ) # decode it to the string, because the content on this site is the content inside to get JSON format, you can use a json .loads (R & lt) r.status # returns the result of the request HU 200 is r.reson # description of the dir (R & lt) # get all methods, all objects that have a method r.headers # obtain header information xpath is an xml document to find information in the language concept node Elements, attributes, text, co-namespace document (root) nodes Node Relationship father child Compatriots Ancestor Posterity expression // node selected from any promoter / are selected from the point of the undercut . Choose from the current node . . The parent of the current node @ Take property