New Project
[root@localhost pytest]# scrapy startproject iteye886 New Scrapy project 'iteye886', using template directory '/usr/lib64/python2.7/site-packages/scrapy/templates/project', created in: /root/pytest/iteye886 You can start your first spider with: cd iteye886 scrapy genspider example example.com [root@localhost pytest]# cd iteye886/ [root@localhost iteye886]# scrapy genspider myblog 886.iteye.com Created spider 'myblog' using template 'basic' in module: iteye886.spiders.myblog [root@localhost iteye886]# tree . ├──iteye886 │ ├── __init__.py │ ├── __init__.pyc │ ├── items.py │ ├── pipelines.py │ ├── settings.py │ ├── settings.pyc │ └── spiders │ ├── __init__.py │ ├── __init__.pyc │ └── myblog.py └── scrapy.cfg 2 directories, 10 files
Write the fields that need to be collected
[root@localhost iteye886]# vim iteye886/items.py 1 # -*- coding: utf-8 -*- 2 3 # Define here the models for your scraped items 4 # 5 # See documentation in: 6 # http://doc.scrapy.org/en/latest/topics/items.html 7 8 import scrapy 9 10 11 class Iteye886Item(scrapy.Item): 12 # define the fields for your item here like: 13 # name = scrapy.Field() 14 title = scrapy.Field()#Set the item to be acquired 15 link = scrapy.Field() 16
edit code
[root@localhost iteye886]# vim iteye886/spiders/myblog.py
1 # -*- coding: utf-8 -*- 2 import scrapy 3 from iteye886.items import Iteye886Item #导入item 4 class MyblogSpider(scrapy.Spider): 5 name = "myblog" 6 allowed_domains = ["886.iteye.com"] 7 start_urls = ( 8 'http://886.iteye.com/', #delete www 9 ) 10 11 def parse(self, response): 12 lis = response.xpath('//*[@id="main"]/div/div[1]/h3/a') #增加xpath 13 item = Iteye886Item() 14 for li in lis: 15 item['title']=response.xpath('a/text()').extract() 16 item['link']=response.xpath('a/@href').extract() 17 yield item
[root@localhost iteye886]# scrapy list myblog [root@localhost iteye886]# scrapy crawl myblog -o abc.csv [root@localhost iteye886]# cat abc.csv link,title /blog/2324590, centos7-python: interactive interface tab completion /blog/2324577,scrapy-0:centos7 install scrapy ....