Crawling rental information go to the market network

Problems encountered:

1.list later added a cross-border inquiry

   try:  

except IndexError:
pass

A simple crawler

. 1  Import Requests
 2  from lxml Import etree
 . 3  Import csv
 . 4  Import OS
 . 5  
. 6  
. 7  # Create a csv file if the file is automatically created present step
 . 8 F = Open ( " house3.csv " , " W + " )
 . 9  
10  # configured a csv objects
 . 11 CSV_File = csv.writer (F)
 12 is  
13 is  # url acquired web page corresponding to the source
 14 head = {
 15          ' the User-- Agent ' : ' the User - Agent-the Mozilla / 5.0 (the Windows NT 6.1; the WOW64; Trident / 7.0; RV: 11.0) '}
 16  DEF getSource (URL):
 . 17      
18 is      . SourceHtml = Requests GET (URL, headers = head)
 . 19      return sourceHtml.text
 20 is  
21 is  # begin crawling and analysis data
 22 is  DEF spiderData (URL):
 23 is      RText = getSource (URL)
 24     # path = " C: // the Users / Administrator / Desktop / Picure " 
25     # converts the contents of the server returns to the xml format
 26 is      HTML = etree.HTML (RText)
 27      div_list = html.xpath ( ' // div [the contains (@class, "List-JS-Tips")] / div [the contains (@class, "F-List-Item")] ' )
28    # data = []
29     csv_file.writerow(["标题","价格","地址"])
30     for item in div_list:
31         
32         try:
33             
34             title= item.xpath('.//dd[contains(@class, "title")]/a/text()')[0]
35             price = item.xpath('.//dd[contains(@class,"info")]/div[@class="price"]/span[@class="num"]/text()')[0]
36             yue = item.xpath('.//dd[contains(@class,"info")]/div[@class="price"]/span[@class="yue"]/text()')[0]
37             address= item.xpath('.//dd[contains(@class, "address")]//a[@class="address-eara"]/text()')[0]
38             img = html.xpath('.//div[@class="img-wrap"]//img/@src')[0]
39             
40             print(title,price+yue,address,img)
41         except IndexError:
        # get the movie icon
43            Pass
42  44 is          IMG = html.xpath ( ' .//div[@class="img-wrap"]//img/@src ' ) [ 0 ]
 45          R & lt = Requests. GET (IMG, headers = head)
 46 is      # # Save image
 47          F1 = Open ( " {} .png " .format (title), " WB " )
 48          Print (F1)
 49          f1.write (r.content) # server returns the contents of a file written to the
 50          F1. Close ()    
 51 is          
52 is          the try :
 53 is              csv_file.writerow ([title, +. price Yue, address, IMG])
 54 is         except:
55             pass
56   #  f1.close()
57    
58     
59 if __name__=='__main__':
60     url="http://cs.ganji.com/zufang/b2/"
61     
62     for i in range(2,5):
63         
64         src="pn"+str(i)
65         url=url+src+"/?qq-pf-to=pcqq.group'"
66         spiderData(url)
67 
68     

 

End ------------ ------------ restore content

Guess you like

Origin www.cnblogs.com/industrial-fd-2019/p/12149262.html