Kettle进阶——Kitchen源码阅读

源代码路径:org.pentaho.di.kitchen.Kitchen

Kitchen是kettle用来启动job的工具,用户可以通过Kitchen.sh脚本执行Job任务。现在我们来看Kitchen是如何运行一个Job的。

打开Kitchen源码进入main()方法我们首先看到其初始化了一个ExecutorService,该executor的主要作用是用来初始化kettle环境。

final ExecutorService executor = ExecutorUtil.getExecutor();

final RepositoryPluginType repositoryPluginType = RepositoryPluginType.getInstance();

//两次submit初始化环境

final Future<Map.Entry<KettlePluginException, Future<KettleException>>> repositoryRegisterFuture =

executor.submit( new Callable<Map.Entry<KettlePluginException, Future<KettleException>>>() {

@Override

public Map.Entry<KettlePluginException, Future<KettleException>> call() throws Exception {

PluginRegistry.addPluginType( repositoryPluginType );

try {

//主要构建.kettle目录以及其下的kettle.properties文件,同时加载一些基本插件类型

KettleClientEnvironment.init();

} catch ( KettlePluginException e ) {

return new AbstractMap.SimpleImmutableEntry<KettlePluginException, Future<KettleException>>( e, null );

}

Future<KettleException> kettleEnvironmentInitFuture =

executor.submit( new Callable<KettleException>() {

@Override

public KettleException call() throws Exception {

try {

KettleClientEnvironment.getInstance().setClient( KettleClientEnvironment.ClientType.KITCHEN );

//加载扩展插件类型,并初始化kettle基本变量的值

KettleEnvironment.init();

} catch ( KettleException e ) {

return e;

}

return null;

}

} );

return new AbstractMap.SimpleImmutableEntry<KettlePluginException, Future<KettleException>>

( null, kettleEnvironmentInitFuture );

}

} );

其中KettleClientEnvironment调用的init()方法主要作用是如果在用户目录下没有.kettle目录及该目录下没有kettle.properties文件将进行构建,同时加载kettle.properties中的配置属性到系统中。KettleEnvironment的init()方法主要加载并初始化engin包下kettle-variables.xml中配置的属性。

其二构建用户元信息存储目录。

DelegatingMetaStore metaStore = new DelegatingMetaStore();

metaStore.addMetaStore( MetaStoreConst.openLocalPentahoMetaStore() );

metaStore.setActiveMetaStoreName( metaStore.getName() );

以上三行代码加载的是以用户目录下的.pentaho目录为元信息存储目录。

接下来第三是解析用户数据参数。用户所有可输入参数如下:

CommandLineOption maxLogLinesOption =

new CommandLineOption(

"maxloglines", BaseMessages.getString( PKG, "Kitchen.CmdLine.MaxLogLines" ), new StringBuffer() );

CommandLineOption maxLogTimeoutOption =

new CommandLineOption(

"maxlogtimeout", BaseMessages.getString( PKG, "Kitchen.CmdLine.MaxLogTimeout" ), new StringBuffer() );

CommandLineOption[] options =

new CommandLineOption[]{

new CommandLineOption( "rep", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepName" ), optionRepname =

new StringBuffer() ),

new CommandLineOption(

"user", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepUsername" ), optionUsername =

new StringBuffer() ),

new CommandLineOption(

"pass", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepPassword" ), optionPassword =

new StringBuffer() ),

new CommandLineOption(

"job", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepJobName" ), optionJobname =

new StringBuffer() ),

new CommandLineOption( "dir", BaseMessages.getString( PKG, "Kitchen.CmdLine.RepDir" ), optionDirname =

new StringBuffer() ),

new CommandLineOption(

"file", BaseMessages.getString( PKG, "Kitchen.CmdLine.XMLJob" ), optionFilename =

new StringBuffer() ),

new CommandLineOption(

"level", BaseMessages.getString( PKG, "Kitchen.CmdLine.LogLevel" ), optionLoglevel =

new StringBuffer() ),

new CommandLineOption(

"logfile", BaseMessages.getString( PKG, "Kitchen.CmdLine.LogFile" ), optionLogfile =

new StringBuffer() ),

new CommandLineOption(

"log", BaseMessages.getString( PKG, "Kitchen.CmdLine.LogFileOld" ), optionLogfileOld =

new StringBuffer(), false, true ),

new CommandLineOption(

"listdir", BaseMessages.getString( PKG, "Kitchen.CmdLine.ListDir" ), optionListdir =

new StringBuffer(), true, false ),

new CommandLineOption(

"listjobs", BaseMessages.getString( PKG, "Kitchen.CmdLine.ListJobsDir" ), optionListjobs =

new StringBuffer(), true, false ),

new CommandLineOption(

"listrep", BaseMessages.getString( PKG, "Kitchen.CmdLine.ListAvailableReps" ), optionListrep =

new StringBuffer(), true, false ),

new CommandLineOption( "norep", BaseMessages.getString( PKG, "Kitchen.CmdLine.NoRep" ), optionNorep =

new StringBuffer(), true, false ),

new CommandLineOption(

"version", BaseMessages.getString( PKG, "Kitchen.CmdLine.Version" ), optionVersion =

new StringBuffer(), true, false ),

new CommandLineOption(

"param", BaseMessages.getString( PKG, "Kitchen.ComdLine.Param" ), optionParams, false ),

new CommandLineOption(

"listparam", BaseMessages.getString( PKG, "Kitchen.ComdLine.ListParam" ), optionListParam =

new StringBuffer(), true, false ),

new CommandLineOption(

"export", BaseMessages.getString( PKG, "Kitchen.ComdLine.Export" ), optionExport =

new StringBuffer(), true, false ),

new CommandLineOption(

"custom", BaseMessages.getString( PKG, "Kitchen.ComdLine.Custom" ), customOptions, false ),

maxLogLinesOption, maxLogTimeoutOption, };

每个参数的作用都有介绍,这就不多说了。

第四是构建repository。

//该实例会以.kettle文件夹下的repositories.xml的内容作为所有repository的元信息

RepositoriesMeta repsinfo = new RepositoriesMeta();

repsinfo.getLog().setLogLevel( log.getLogLevel() );

try {

//加载并解析repositories.xml内容

repsinfo.readData();

} catch ( Exception e ) {

throw new KettleException( BaseMessages.getString( PKG, "Kitchen.Error.NoRepDefinied" ), e );

}

if ( log.isDebug() ) {

log.logDebug( BaseMessages.getString( PKG, "Kitchen.Log.FindingRep", "" + optionRepname ) );

}

//根据-rep参数找到repository元信息

repositoryMeta = repsinfo.findRepository( optionRepname.toString() );

if ( repositoryMeta != null ) {

// Define and connect to the repository...

if ( log.isDebug() ) {

log.logDebug( BaseMessages.getString( PKG, "Kitchen.Log.Alocate&ConnectRep" ) );

}

//通过反射构建repository

repository =

PluginRegistry.getInstance().loadClass(

RepositoryPluginType.class, repositoryMeta, Repository.class );

repository.init( repositoryMeta );

repository.getLog().setLogLevel( log.getLogLevel() );

repository.connect( optionUsername != null ? optionUsername.toString() : null, optionPassword != null

? optionPassword.toString() : null );

repository.getSecurityProvider().validateAction( RepositoryOperation.EXECUTE_JOB );

RepositoryDirectoryInterface directory = repository.loadRepositoryDirectoryTree(); // Default = root

// Add the IMetaStore of the repository to our delegation

//

if ( repository.getMetaStore() != null ) {

metaStore.addMetaStore( repository.getMetaStore() );

}

接下来是加载具体Job。具体也是根据路径先构造jobMeta元信息,然后在根据repository和jobMeta创建Job,需要注意的是一个Job的xml文件里面包含了job里面需要执行的每个entry,所以jobMeta也具有解析并实例每一个entry的能力,具体方法看jobMeta的loadXML()方法。

// Load a job

if ( !Const.isEmpty( optionJobname ) ) {

if ( log.isDebug() ) {

log.logDebug( BaseMessages.getString( PKG, "Kitchen.Log.LoadingJobInfo" ) );

}

blockAndThrow( kettleInitFuture );

jobMeta = repository.loadJob( optionJobname.toString(), directory, null, null ); // reads last version

if ( log.isDebug() ) {

log.logDebug( BaseMessages.getString( PKG, "Kitchen.Log.AllocateJob" ) );

}

job = new Job( repository, jobMeta );

}

最后就是执行Job了。具体代码如下:

//存储Job的执行结果

Result result = null;

int returnCode = 0;

try {

// Set the command line arguments on the job ...

if ( args.size() == 0 ) {

job.setArguments( null );

} else {

job.setArguments( args.toArray( new String[args.size()] ) );

}

//初始化具体Job实例

job.initializeVariablesFrom( null );

job.setLogLevel( log.getLogLevel() );

job.getJobMeta().setInternalKettleVariables( job );

job.setRepository( repository );

job.getJobMeta().setRepository( repository );

job.getJobMeta().setMetaStore( metaStore );

// Map the command line named parameters to the actual named parameters. Skip for

// the moment any extra command line parameter not known in the job.

String[] jobParams = jobMeta.listParameters();

for ( String param : jobParams ) {

String value = optionParams.getParameterValue( param );

if ( value != null ) {

job.getJobMeta().setParameterValue( param, value );

}

}

job.copyParametersFrom( job.getJobMeta() );

// Put the parameters over the already defined variable space. Parameters get priority.

//

job.activateParameters();

// Set custom options in the job extension map as Strings

//

for ( String optionName : customOptions.listParameters() ) {

String optionValue = customOptions.getParameterValue( optionName );

if ( optionName != null && optionValue != null ) {

job.getExtensionDataMap().put( optionName, optionValue );

}

}

// List the parameters defined in this job

// Then simply exit...

//

if ( "Y".equalsIgnoreCase( optionListParam.toString() ) ) {

for ( String parameterName : job.listParameters() ) {

String value = job.getParameterValue( parameterName );

String deflt = job.getParameterDefault( parameterName );

String descr = job.getParameterDescription( parameterName );

if ( deflt != null ) {

System.out.println( "Parameter: "

+ parameterName + "=" + Const.NVL( value, "" ) + ", default=" + deflt + " : "

+ Const.NVL( descr, "" ) );

} else {

System.out.println( "Parameter: "

+ parameterName + "=" + Const.NVL( value, "" ) + " : " + Const.NVL( descr, "" ) );

}

}

// stop right here...

//

exitJVM( 7 ); // same as the other list options

}

job.start();

job.waitUntilFinished();

result = job.getResult(); // Execute the selected job.

}

猜你喜欢

转载自blog.csdn.net/maenlai0086/article/details/81268983
今日推荐