# - * - Coding: UTF-. 8 - * - # @time: 2019/7/1 14:56 Import Requests Import Time the baseUrl = 'http://baike.baidu.com/view/' headers = { 'User- - Agent ':' the Mozilla / 5.0 (the Windows NT 10.0; Win64; x64-) AppleWebKit / 537.36 (KHTML, like the Gecko) the Chrome / 72.0.3626.109 Safari / 537.36 '} countToSleep = 300 Writer = Open ( "itemUrl.txt", "A + ", encoding =" utf8 ") // storage that can be accessed entries url filedWriter = Open (" filedItemUrl.txt "," a + ", encoding =" utf8 ") // Because Baidu storage server anti-climbing strategy and failed number, and finally collected by assembling url errorNumber = Open ( "errorNumberItemUrl.txt", "a +", encoding = "UTF8") // url assembled absent for I in Range (. 1, 15.5 million): the try: countToSleep -= 1 if countToSleep > 0: = requests.get Response (the baseUrl + STR (I), headers = headers) IF 'error' in response.url: errorNumber.write (STR (I) + '\ n-') the else: writer.Write (response.url + '\ n-') Print ( "first" + str (i) + "th; current URL:" + response.url) the except: filedWriter.write (STR (I) + '\ n-') Print ( "server oFF open, reconnect crawling ... ") the time.sleep (. 4) the finally: IF Not (countToSleep> 0): the time.sleep (2) Print (" rest ------------ ------- ") countToSleep = 300 Writer.flush()flush() errorNumber.flush() filedWriter.flush()