Request the urllib Import from Import Time Import the urllib Import The urllib.parse # The url transmission request, obtain the server response file DEF loadPage (url, filename): Print ( 'downloading' + filename) headers = { 'the User - - Agent': 'the Mozilla / 5.0 (the Windows NT 6.1; the WOW64) AppleWebKit / 537.36 (KHTML, like the Gecko) the Chrome / 65.0.3325.181 Safari / 537.36 ' } REQ = urllib.request.Request (URL, headers = headers) return the urllib.request.urlopen (REQ ) .read () # write the html content locally DEF WritePage (html, filename): Print ( 'saving' + filename) with Open (filename, 'wb') AS f: f.write (html) Print ( ' ------------------------------- ') # Http://tieba.baidu.com/f?kw=python&fr=ala0&tpl=5 first page # http://tieba.baidu.com/f?kw=python&ie=utf-8&pn=0 in accordance with the law and above the first page is the same url # http://tieba.baidu.com/f?kw=python&ie=utf-8&pn=50 second page # http://tieba.baidu.com/f?kw=python&ie= utf-8 & pn = 100 third page # http://tieba.baidu.com/f?kw=python&ie=utf-8&pn=150 fourth page # http://tieba.baidu.com/f?kw=python&ie= utf-8 & pn = 200 fifth page url # processing each page DEF tiebaSpider (URL, BeginPage, the endPage): for page in Range (BeginPage, the endPage +. 1): PN = (page -. 1) * 50 filename = 'D : / yemian / second '+ str (page) +' p .html ' fullurl URL = +' & pn-'+ STR (PN) HTML = loadPage (fullurl,filename) writePage(html,filename) __name__ == IF '__main__': kw = the INPUT ( 'Please enter the required crawling pages of Post Bar name:') BeginPage = int (the INPUT ( 'Please enter start:')) the endPage = int (the INPUT ( 'Please enter end page: ')) ? URL =' http://tieba.baidu.com/f ' Key = urllib.parse.urlencode ({' kW ':} kW) fullurl URL = + Key tiebaSpider (fullurl, BeginPage, the endPage ) Print ( 'Thank you for using') the time.sleep (10)