luigi 模板

luigi doc

import os,sys
import luigi
import luigi.contrib.hdfs
from datetime import datetime, timedelta

class DummyTarget(luigi.Target):
    def __init__(self, exist):
        self.exist = exist

    def exists(self):
        return self.exist

class CrawlDataInput(luigi.ExternalTask):
    input_file=luigi.Parameter()
    def output(self):
        target = luigi.contrib.hdfs.HdfsTarget(self.input_file)
        if not  target.exists():
            return DummyTarget(False)
        return target

class CrawlParseBaseData(luigi.Task):
    version = luigi.Parameter()
    def requires(self):
        return CrawlDataInput(done_base_tag)
    def run(self):

    def output(self):
        target=luigi.contrib.hdfs.HdfsTarget(done_base_tag)
        return target 


class CrawlDataParserTask(luigi.Task):
    version = luigi.Parameter()
    def requires(self):
        return CrawlParseBaseData(version=self.version)
    def run(self):

    def output(self):
        target=luigi.contrib.hdfs.HdfsTarget(done_base_tag)
        return target 

if __name__ == '__main__':
       date_str = (datetime.now() - timedelta(2)).strftime('%Y%m%d')
       luigi.build([CrawlDataParserTask(version=date_str)])

发布了557 篇原创文章 · 获赞 500 · 访问量 153万+

猜你喜欢

转载自blog.csdn.net/qq_16234613/article/details/95518751