Because the problem some network environments, Ethernet Square this https://etherscan.io/ website and is not directly accessible, so you need to configure it.
This reptile can official website of the latest 500 intelligent crawling down the contract, robustness, okay.
After the code directly Copy, need to modify the path to the file filepath, can be executed.
Operating environment is recommended Python3.6 above.
# -*- coding: utf8 -*- # SmartContactSpider.py import requests from bs4 import BeautifulSoup import traceback import re import os import time import datetime def printtime(): print(time.strftime("%Y-%m-%d %H:%M:%S:", time.localtime()), end=' ') return 0 def getsccodecore(eachLine): # 伪装成浏览器 headers = { '- Agent-the User ' : ' the Mozilla / 5.0 (the Windows NT 10.0; the WOW64) AppleWebKit / 537.36 (KHTML, like the Gecko) the Chrome / 78.0.3904.87 Safari / 537.36 ' } failedTimes = 100 the while True: # has been circulated within the developing times, until visit the site successful iF (failedTimes <= 0): printTime () Print ( " ! too many failed attempts, check the network environment " ) BREAK failedTimes - = 1 the try : # the following are used to capture except when requesting an exception requests , # by capturing and then wait for the network changes in the situation, in order to protect the uninterrupted operation of the program printtime () Print ( ' URL link is connected to the ' + the eachLine, End = '' ) Response = requests.get (the eachLine, headers = headers, timeout =. 5 ) BREAK the except requests.exceptions.ConnectionError: printTime () Print ( ' the ConnectionError ! Please wait 3 seconds! ' ) the time.sleep ( . 3 ) the except requests.exceptions.ChunkedEncodingError: printTime () Print ( ' !! Please wait 3 seconds ChunkedEncodingError ' ) the time.sleep ( . 3 ) except: printtime() print('Unfortunitely,出现未知错误!请等待3秒!') time.sleep(3) response.encoding = response.apparent_encoding soup = BeautifulSoup(response.text, "html.parser") targetPRE = soup.find_all('pre', 'js-sourcecopyarea editor') filepath = "C:\\Users\\15321\\Desktop\\SmartContract\\code\\" filename = eachLine[29:71] if (os.path.exists(filepath + filename + '.sol')): printtime() print(filename + '已存在!') return 0 fo = open(filepath + filename + '.sol', "w+", encoding="utf-8"); fo.write(targetPRE[0].text) fo.close() printtime() print(filename + '新建完成!') return 0 def getsccode(): the try : SCAddress = Open ( " C: \\ 15321 the Users \\ Desktop \\ \\ \\ SmartContract address \\ Address.txt " , " r " ) the except : printTime () Print ( ' open intelligent warehouse contract URL address error ! check the file directory is correct! ' ) for eachLine in SCAddress: getsccodecore (eachLine) # this is the core function of intelligent acquisition contract code SCAddress.close () return 0 DEF getSCAddress (eachurl, filepath): # disguised as some kind of browser, and prevent denial of service server headers = { 'Agent-the User ' : ' Mozilla / 5.0 (Windows NT 10.0; WOW64) AppleWebKit / 537.36 (KHTML, like Gecko) Chrome / 78.0.3904.87 Safari / 537.36 ' } # After setting the maximum number of visits to Web site failed, to develop frequency report error, stop the program failedTimes = 50 the while True: # has been circulating within the number to access the site until the development of successful IF (failedTimes <= 0): printTime () Print ( " too many failures, check the network environment! " ) BREAK failedTimes -. 1 = # each time will decrease. 1 the try : # the following are used except when the capture request requests abnormal, # By capturing and then wait for the network situation changes, in order to protect the uninterrupted operation of the program Print ( ' URL links are connecting to is ' + eachurl) the Response = requests.get (url = eachurl, headers = headers, timeout = 5 ) # perform a successful visit to this sentence means, then exit the while loop BREAK the except requests.exceptions.ConnectionError: printTime () Print ( ' !! ConnectionError Please wait 3 seconds ' ) the time.sleep ( 3 ) the except requests.exceptions.ChunkedEncodingError: printTime () Print ( ' ChunkedEncodingError! Please wait 3 seconds! ' ) the time.sleep ( . 3 ) the except : printTime () Print ( ' !! Unknown error Please wait 3 seconds ' ) the time.sleep ( . 3 ) # is converted to UTF-8 encoding response.encoding = response.apparent_encoding # soup, soup = BeautifulSoup (response.text, " html.parser " ) # find this field, this field contains the URL address of the smart contract code targetDiv = soup.find_all ( ' div ' , ' the Table-2-responsive mb mb-md-0 ' ) the try : targetTBody= TargetDiv [0] .table.tbody the except : printTime () Print ( " targetTBody not succeed! " ) Return 1 # to open the file added. # If the file does not exist, a new; if the file already exists, then the file pointer append FO = Open (filepath + " Address.txt " , " A " ) # to each address, there are written to the file saved for targetTR in targetTBody: IF targetTR.name == ' TR ' : fo.write ( " https://etherscan.io " + targetTR.td.find('a', 'hash-tag text-truncate').attrs['href'] + "\n") fo.close() return 0 def updatescurl(): urlList = ["https://etherscan.io/contractsVerified/1?ps=100", "https://etherscan.io/contractsVerified/2?ps=100", "https://etherscan.io/contractsVerified/3?ps=100", "https://etherscan.io/contractsVerified/4?ps=100 " , " https://etherscan.io/contractsVerified/5?ps=100 " ] # store filepath contract is smart save to address crawling file path # Please change the path they want according to their needs. filepath = ' C: \\ 15321 the Users \\ Desktop \\ \\ \\ SmartContract address \\ ' # the old address storage contract documents cleaned try : IF (os.path.exists (filepath + " Address.txt " )): The os.remove (filepath + " Address.txt " ) printTime () Print ( 'It has cleared the old file (warehouse) in the% s directory! ' % Filepath) the except IOError: printTime () Print ( " error that can not handle appears, terminate the program:! IOError " ) # function does not perform properly, return 1 return 1 # read urlList where every web page URL in intelligence contract address for eachurl in urlList: Time = 0 the while (. 1 == getSCAddress (eachurl, filepath)): Time + =. 1 IF (Time == 10 ): BREAK Pass # function normally performed, return 0 return 0 DEF main (): # update to address crawling smart contract updatescurl () # climb to take the address code intelligence contracts intelligent contracts getsccode () main ()