Simple crawling Baidu Post Bar Pictures

import urllib.request
import urllib
from lxml import  etree
import requests
url="https://tieba.baidu.com/f?kw=%E6%A1%8C%E9%9D%A2&ie=utf-8&pn=50"
headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}


response=requests.get(url,headers)
html=etree.HTML(response.text)

img=html.xpath('* // [@ class = "threadlist_media j_threadlist_media clearfix"] / Li / A / IMG ' ) 

X =. 1
 for I in Range (len (IMG)): # with img xpath contains a dictionary returns a list of 
           data = img [I] .attrib # returns href and class labels, text returned text, tag to return the tag name 
           J = Data [ ' Data-Original ' ] # obtained according to the dictionary desired output link 
           X = X +. 1 
           urllib.request.urlretrieve (J, " C: \\ \\ lenovo the Users \\ Desktop \\% s.jpg " % the X-) # download the file to a local, plus the suffix show format
   

 

Guess you like

Origin www.cnblogs.com/persistence-ok/p/11440495.html