Crawling xiachufang picture test hand

__author__ = 'Administrator'
# -*- encoding=gbk -*-
import requests
import os
from bs4 import BeautifulSoup
from urllib.parse import urlparse

r=requests.get('http://xiachufang.com/')
soup=BeautifulSoup(r.text)
img_list=[]
for img in soup.select('img'):
    if img.has_attr('data-src'):
        img_list.append(img.attrs['data-src'])
    else:
        img_list.append(img.attrs['src'])
image_dir=os.path.join(os.curdir,'images')
print(image_dir)
print(os.curdir)
if not os.path.isdir(image_dir):
    os.makedirs(image_dir)
for img in img_list:
    a = urlparam (img)
    filename=o.path[1:].split('@')[0]
    filepath=os.path.join(image_dir,filename)
    print(img.split('?')[0].split('@')[0])
    resp=requests.get(img.split('?')[0].split('@')[0])
    Open with (filepath, ' WB ' ) AS F:
         for the chunk in resp.iter_content (1024): # set the write cache block size 
            f.write (chunk)
urllib
    python3 standard library
        parse
        from urllib.request import urlopen
        r=urlopen("http://httpbin.org/get")
        r.read () # get the binary content of 
        text = r.read (). decode ( " UTF-8 " ) # decode it to the string, because the content on this site is the content inside to get JSON format, you can use a json .loads (R & lt) 
        r.status # returns the result of the request HU 200 is 
        r.reson # description of 
        the dir (R & lt) # get all methods, all objects that have a method 
        r.headers # obtain header information
        
            
xpath is an xml document to find information in the language
concept
    node
        Elements, attributes, text, co-namespace document (root) nodes
    Node Relationship
        father
        child
        Compatriots
        Ancestor
        Posterity
    expression    
    //         node selected from any promoter
     /         are selected from the point of the undercut
    . Choose from the current node
    . . The parent of the current node
    @ Take property

 

Guess you like

Origin www.cnblogs.com/xupanfeng/p/11706108.html