Java web项目创建笔记26 之《整合kettle》

kettle是一个ETL工具,用于数据的抽取、转换、加载功能,我们可以在java里加载kjb文件或者ktr文件

1、pom文件添加依赖

<kettle.version>9.1.0.0-324</kettle.version>
<!-- kettle -->
<dependency>
	<groupId>pentaho-kettle</groupId>
	<artifactId>kettle-core</artifactId>
	<version>${kettle.version}</version>
</dependency>
<dependency>
	<groupId>pentaho-kettle</groupId>
	<artifactId>kettle-engine</artifactId>
	<version>${kettle.version}</version>
</dependency>
<dependency>
	<groupId>org.apache.commons</groupId>
	<artifactId>commons-vfs2</artifactId>
	<version>2.7.0</version>
</dependency>
<dependency>
	<groupId>pentaho</groupId>
	<artifactId>metastore</artifactId>
	<version>${kettle.version}</version>
</dependency>
<!-- oracle驱动包 -->
<dependency>
	<groupId>com.oracle.database.jdbc</groupId>
	<artifactId>ojdbc8</artifactId>
	<version>19.9.0.0</version>
</dependency>
<!-- pom里配置远程仓库 -->
<repositories>
	<!-- kettle包仓库地址 -->
	<repository>
		<id>pentaho.resolve.repo</id>
		<name>Pentaho Omni</name>
		<url>http://nexus.pentaho.org/content/groups/omni</url>
		<releases>
			<enabled>true</enabled>
			<updatePolicy>always</updatePolicy>
		</releases>
		<snapshots>
			<enabled>false</enabled>
			<updatePolicy>always</updatePolicy>
		</snapshots>
	</repository>
</repositories>

由于项目maven默认从阿里云拉依赖包,但是阿里云没有kettle相关的包,所以要添加专用的仓库地址
因为用到了oracle数据库,所以加上了oracle驱动包

2、在webapp2_base模块添加KettleUtil.java

package com.study.base.util;

import java.util.Map;

import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.plugins.PluginRegistry;
import org.pentaho.di.job.Job;
import org.pentaho.di.job.JobMeta;
import org.pentaho.di.repository.filerep.KettleFileRepository;
import org.pentaho.di.repository.filerep.KettleFileRepositoryMeta;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class KettleUtil {

	private static final Logger logger = LoggerFactory.getLogger(KettleUtil.class);

	/**
	 * Kettle环境初始化
	 * 
	 * @param classpathDIR
	 * @return
	 * @throws KettleException
	 */
	private static KettleFileRepository kettleInit(String classpathDIR) throws KettleException {
		// 设置插件目录
		System.setProperty("KETTLE_PLUGIN_BASE_FOLDERS", "E:/abc");
		// 或者
		// StepPluginType.getInstance().getPluginFolders().add(new PluginFolder("E:/abc", false, true));
		// 初始化插件,注册插件类型并加载它们各自的插件
		PluginRegistry.init();
		// 初始化客户端环境
		KettleEnvironment.init();
		String basedir = Thread.currentThread().getContextClassLoader().getResource(classpathDIR).getPath();
		// 资源库元对象
		KettleFileRepositoryMeta repoMeta = new KettleFileRepositoryMeta(null, null, null, basedir);
		// 文件形式的资源库
		KettleFileRepository fileRepo = new KettleFileRepository();
		fileRepo.init(repoMeta);
		return fileRepo;
	}

	/**
	 * 执行作业
	 * 
	 * @param initKettleParam
	 * @param kjbFileName
	 * @return
	 */
	public static boolean runKettleJob(Map<String, String> initKettleParam, String kjbFileName) {
		logger.info("runKettleJob: {}", kjbFileName);
		try {
			// 先加载资源库目录
			// 取resources下的kettle目录
			KettleFileRepository fileRepo = kettleInit("kettle");
			// 再加载kjb文件
			JobMeta jobMeta = fileRepo.loadJob(kjbFileName, null, null, null);
			// 作业
			Job job = new Job(null, jobMeta);
			// 初始化job参数,脚本中获取参数值:${variableName}
			if (initKettleParam != null) {
				for (String variableName : initKettleParam.keySet()) {
					job.setVariable(variableName, initKettleParam.get(variableName));
				}
			}
			// 执行作业
			job.start();
			// 等待作业执行完成
			job.waitUntilFinished();
			if (job.getErrors() > 0) {
				logger.info("runKettleJob执行失败:", kjbFileName);
			} else {
				logger.info("runKettleJob执行成功:", kjbFileName);
			}
			return true;
		} catch (Exception e) {
			logger.error("runKettleJob: {}", e);
			return false;
		}
	}

	/**
	 * 执行转换
	 * 
	 * @param initKettleParam
	 * @param ktrFileName
	 * @return
	 */
	public static boolean runKettleTransfer(Map<String, String> initKettleParam, String ktrFileName) {
		logger.info("runKettleTransfer: {}", ktrFileName);
		try {
			// 先加载资源库目录
			// ktr文件放在resources下的kettle目录
			KettleFileRepository fileRepo = kettleInit("kettle");
			// 再加载ktr文件
			TransMeta transMeta = fileRepo.loadTransformation(ktrFileName, null, null, false, null);
			// 转换
			Trans trans = new Trans(transMeta);
			// 初始化trans参数,脚本中获取参数值:${variableName}
			if (initKettleParam != null) {
				for (String variableName : initKettleParam.keySet()) {
					trans.setVariable(variableName, initKettleParam.get(variableName));
				}
			}
			// 执行转换
			trans.execute(null);
			// 等待转换执行结束
			trans.waitUntilFinished();
			if (trans.getErrors() > 0) {
				logger.info("runKettleTransfer执行失败: {}", ktrFileName);
			} else {
				logger.info("runKettleTransfer执行成功: {}", ktrFileName);
			}
			return true;
		} catch (Exception e) {
			logger.error("runKettleTransfer: {}", e);
			return false;
		}
	}

}

3、在webapp2_web模块添加测试类KettleTest.java

package webapp;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;

import com.study.base.util.KettleUtil;

@RunWith(SpringJUnit4ClassRunner.class)
@WebAppConfiguration
@ContextConfiguration(locations = { "classpath:applicationContext.xml", "classpath:spring-mvc.xml" })
@ActiveProfiles("dev")
public class KettleTest {

	@Test
	public void ktrTest() {
		KettleUtil.runKettleTransfer(null, "test1");
	}
}

4、在webapp2_web模块src/main/resources下
1)建立kettle目录,将test1.ktr复制进去
2)添加kettle-password-encoder-plugins.xml

<password-encoder-plugins>
	<password-encoder-plugin id="Kettle">
		<description>Kettle Password Encoder</description>
		<classname>org.pentaho.di.core.encryption.KettleTwoWayPasswordEncoder</classname>
	</password-encoder-plugin>
</password-encoder-plugins>

将插件文件夹kettle-json-plugin复制到指定目录E:/abc
执行测试方法:

2021-01-27 10:28:36.118 [] [] [] [] INFO  [main] com.study.base.util.KettleUtil [93]:runKettleTransfer: test1
2021-01-27 10:28:36.941 [] [] [] [] INFO  [main] org.pentaho.di.trans.Trans [98]:[test1.ktr]  Dispatching started for transformation [test1]
2021-01-27 10:28:55.406 [] [] [] [] INFO  [test1 - 抽取交易流水表] org.pentaho.di.trans.Trans [98]:[test1.ktr]  Finished reading query, closing connection.
2021-01-27 10:28:55.423 [] [] [] [] INFO  [test1 - 抽取交易流水表] org.pentaho.di.trans.Trans [98]:[test1.ktr]  Finished processing (I=20122, O=0, R=0, W=20122, U=0, E=0)
2021-01-27 10:28:55.492 [] [] [] [] INFO  [test1 - 输出json格式] org.pentaho.di.trans.Trans [98]:[test1.ktr]  Finished processing (I=0, O=20122, R=20122, W=20122, U=0, E=0)
2021-01-27 10:28:55.495 [] [] [] [] INFO  [main] com.study.base.util.KettleUtil [115]:runKettleTransfer执行成功: test1

5、遇到的坑

报错:
org.pentaho.di.core.exception.KettleException: 
Unable to find plugin with ID 'Kettle'.  If this is a test, make sure kettle-core tests jar is a dependency.  If this is live make sure a kettle-password-encoder-plugins.xml exits in the classpath
解决:
添加kettle-password-encoder-plugins.xml

报错:
Can't run transformation due to plugin missing
Error initializing step [输出json格式]
An error occurred, processing will be stopped: 
Error occurred while trying to connect to the database
Driver class 'oracle.jdbc.driver.OracleDriver' could not be found, make sure the 'Oracle' driver (jar file) is installed.
oracle.jdbc.driver.OracleDriver
解决:
添加oracle驱动包

报错:
Can't run transformation due to plugin missing
Error initializing step [输出json格式]
Step [输出json格式.0] failed to initialize!
解决:
这个转换里面需要将数据输出成json格式,需要添加插件,本地在pdi的安装目录,比如:E:\pdi-ce-9.1.0.0-324\data-integration\plugins

扫描二维码关注公众号,回复: 12899196 查看本文章

6、有三种方式加载插件
1)启动参数设置插件目录
-DKETTLE_PLUGIN_BASE_FOLDERS=E:/abc
2)代码设置系统变量
System.setProperty("KETTLE_PLUGIN_BASE_FOLDERS",
                "E:/abc");
3)直接加载插件
StepPluginType.getInstance().getPluginFolders().
        add(new PluginFolder("E:/abc", false, true));

参考资料:
https://blog.csdn.net/qq_32448349/article/details/87863746
https://blog.csdn.net/bluebelfast/article/details/43192995
https://segmentfault.com/a/1190000020998920
https://blog.csdn.net/d6619309/article/details/50654355

注:最新代码上传至https://github.com/csj50/webapp2.git
 

猜你喜欢

转载自blog.csdn.net/csj50/article/details/113180483
今日推荐