# Today target ** scrapy the Tomb three pages crawled ** today crawling Tomb notes that the novel, by the analysis of the main content of the novel in three pages, so we need to eleven parse * code implementation * daomu .py `` ` Import Scrapy from ..items Import DaomuItem class DaomuSpider (scrapy.Spider): name = ' daomu ' allowed_domains = [ ' daomubiji.com ' ] start_urls = [ ' http://www.daomubiji.com/ ' ] # analytic functions parse a page DEF parse (Self, the Response): #link_list: [ 'HTTP: // XXX / DAO-MU-BI--JI. 1', '', '', ''] link_list response.xpath = ( ' // UL [@ class = "Sub-MENU"] / Li / A / @ the href ' ) .extract () for Link in link_list: # to the scheduler the yield scrapy.Request ( URL = Link, the callback = self.parse_two_html ) # analytic function of two pages (chapter chapter number name ring the name link) DEF parse_two_html (Self, the Response): # baseline xpath article_list response.xpath = ( ' // Article This article was ') for Article This article was in article_list: # Create item object item = () DaomuItem # info_list: [ 'Chapter' Lu and seven ',' blood dead '] info_list = article.xpath ( ' ./a/text () ' .) .get () Split () IF len (info_list) ==. 3 : Item [ ' volume_name ' ] = info_list [0] Item [ ' zh_num ' ] = info_list [. 1 ] Item [ ' zh_name '] = info_list[2] the else : Item [ ' volume_name ' ] = info_list [0] Item [ ' zh_name ' ] = info_list [. 1 ] Item [ ' zh_num ' ] = '' # extract links and distributed scheduler queues Item [ ' zh_link ' ] = Article This article was .xpath ( ' ./a/@href ' ) .get () the yield scrapy.Request ( URL = Item [ ' zh_link ' ], #meta parameters: passing to the next item object analytic function meta = { ' item ' :} item, the callback = self.parse_three_html ) # resolved three pages (novel content) function DEF parse_three_html (Self, Response): # obtaining a function transfer from the item object item response.meta = [ ' item ' ] # CONTENT_LIST: [ 'paragraph 1', 'paragraph 2', '', ''] CONTENT_LIST = response.xpath ( " // Article This article was [@ class =" Content-Article This article was "] // P / text () ' ) .extract () Item [ 'zh_content' ] = ' \ N- ' .join (CONTENT_LIST) the yield Item `` ` items.py ` `` Import Scrapy class DaomuItem (scrapy.Item): # DEFINE The Fields here Wallpaper for your like Item: # name = scrapy.Field ( ) # volume names volume_name = scrapy.Field () # chapter number zh_num = scrapy.Field () # chapter name zh_name = scrapy.Field () # section links zh_link = scrapy.Field () # novel content zh_content = scrapy.Field() ``` pipelines.py ``` class DaomuPipeline(object): def process_item(self, item, spider): filename = '/home/tarena/daomu/{}_{}_{}'.format( item['volume_name'], item['zh_num'], item['zh_name'] ) with open(filename,'w') as f: f.write(item['zh_content']) return item ```