Scrapy爬虫框架学习(二)爬取内容导入csv
继续用爬虫框架学习框架一中已有的项目,在spiders文件夹中新建dmoz_csv.py文件
下面代码为新建文件的内容
import scrapy
import csv
import codecs
class DmozSpider(scrapy.Spider):
name = "dmoz2"
start_urls = [
'http://quotes.toscrape.com/page/1/',
'http://quotes.toscrape.com/page/2/',
]
def parse(self, response):
a = response.xpath('//div[@class="tags"]/a[@class="tag"]')
f = codecs.open('c:/test.csv','w',encoding='utf-8')
writer = csv.writer(f)
for x in a:
writer.writerow(x.xpath('text()').extract()+x.xpath('@href').extract()) #此步骤相当于合并两个list
print(x.xpath('text()').extract()+x.xpath('@href').extract())
f.close()
在项目所在文件夹中打开cmd命令
输入:
scrapy crawl dmoz2
#在c盘下面会生成爬取的test.csv文件
文件中爬取的内容为:
friends /tag/friends/page/1/
heartbreak /tag/heartbreak/page/1/
inspirational /tag/inspirational/page/1/
life /tag/life/page/1/
love /tag/love/page/1/
sisters /tag/sisters/page/1/
courage /tag/courage/page/1/
friends /tag/friends/page/1/
simplicity /tag/simplicity/page/1/
understand /tag/understand/page/1/
love /tag/love/page/1/
fantasy /tag/fantasy/page/1/
life /tag/life/page/1/
navigation /tag/navigation/page/1/
activism /tag/activism/page/1/
apathy /tag/apathy/page/1/
hate /tag/hate/page/1/
indifference /tag/indifference/page/1/
inspirational /tag/inspirational/page/1/
love /tag/love/page/1/
opposite /tag/opposite/page/1/
philosophy /tag/philosophy/page/1/
friendship /tag/friendship/page/1/
lack-of-friendship /tag/lack-of-friendship/page/1/
lack-of-love /tag/lack-of-love/page/1/
love /tag/love/page/1/
marriage /tag/marriage/page/1/
unhappy-marriage /tag/unhappy-marriage/page/1/
books /tag/books/page/1/
contentment /tag/contentment/page/1/
friends /tag/friends/page/1/
friendship /tag/friendship/page/1/
life /tag/life/page/1/
fate /tag/fate/page/1/
life /tag/life/page/1/
misattributed-john-lennon /tag/misattributed-john-lennon/page/1/
planning /tag/planning/page/1/
plans /tag/plans/page/1/