煎蛋ooxx

pipeline.py

class Jiandanline(FilesPipeline):

    def get_media_requests(self, item, info):
        for file_url in item['file_urls']:
            yield scrapy.Request(file_url)

    def item_completed(self, results, item, info):
        file_paths = [x['path'] for ok, x in results if ok]
        if not file_paths:
            raise DropItem("Item contains no files")
        item['file_paths'] = file_paths
        return item

    def file_path(self, request, response=None, info=None):
        path = super().file_path(request, response=None, info=None)
        file_store = os.path.join(settings.FILES_STORE,'images')
        if not os.path.exists(file_store):
            os.mkdir(file_store)
        file_name =  os.path.join(file_store,path)

        # file_guid = request.url.split('/')[-1]
        # filename = u'full/{0[name]}/{0[albumname]}/{1}'.format(item, file_guid)
        return file_name

settings.py

ITEM_PIPELINES = {'jiandandan.pipelines.Jiandanline': 2}
FILES_STORE =r'F:\jiandan'

算是第一个运行成功的scrapy吧,特别开心

猜你喜欢

转载自www.cnblogs.com/bamboozone/p/10497938.html
今日推荐