Hadoop大数据生态:SpringBoot整合hive,使用spring的jdbcTemplate操作Hive

简介

  1. 使用开发工具连接hive客户端,整合SpringBoot与Hive的连接,同时进行数据的增删查改;
  2. 创建hive连接,放入druid数据库连接池,自动注入jdbctemplate;
  3. 开发前提:已经安装hive客户端,可参考博客:阿里云ECS7安装搭建:hive-2.1.1客户端
  4. 版本配置:hadoop2.7.6,hive1.3.3

实践

  • 1. 首先,pom文件加入hive依赖;
<!-- 添加hadoop依赖 -->
<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-common</artifactId>
	<version>2.6.0</version>
</dependency>

<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-mapreduce-client-core</artifactId>
	<version>2.6.0</version>
</dependency>

<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-mapreduce-client-common</artifactId>
	<version>2.6.0</version>
</dependency>

<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-hdfs</artifactId>
	<version>2.6.0</version>
</dependency>

<dependency>
	<groupId>jdk.tools</groupId>
	<artifactId>jdk.tools</artifactId>
	<version>1.8</version>
	<scope>system</scope>
	<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
</dependency>

<dependency>
	<groupId>org.springframework.boot</groupId>
	<artifactId>spring-boot-configuration-processor</artifactId>
	<optional>true</optional>
</dependency>

<!-- 添加hive依赖 -->
<dependency>
	<groupId>org.apache.hive</groupId>
	<artifactId>hive-jdbc</artifactId>
	<version>2.1.1</version>
	<exclusions>
		<exclusion>
			<groupId>org.eclipse.jetty.aggregate</groupId>
			<artifactId>*</artifactId>
		</exclusion>
	</exclusions>
</dependency>
  • 2. 配置文件yml,添加数据源属性配置;
#配置多个数据源(这里测试hive数据源)
spring:
    datasource:
      hive: #hive数据源
          url: jdbc:hive2://47.100.200.200:9019/default
          type: com.alibaba.druid.pool.DruidDataSource
          username: sixmonth
          password: sixmonth
          driver-class-name: org.apache.hive.jdbc.HiveDriver
      commonConfig: #连接池统一配置,应用到所有的数据源 
         initialSize: 1
         minIdle: 1
         maxIdle: 5
         maxActive: 50
         maxWait: 10000
         timeBetweenEvictionRunsMillis: 10000
         minEvictableIdleTimeMillis: 300000
         validationQuery: select 'x'
         testWhileIdle: true
         testOnBorrow: false
         testOnReturn: false
         poolPreparedStatements: true
         maxOpenPreparedStatements: 20
         filters: stat	
  • 3. 新建属性统一配置类:DataSourceProperties;
package com.springboot.sixmonth.common.config.druid;

import java.util.Map;

import org.springframework.boot.context.properties.ConfigurationProperties;

/**
 * -统一属性控制类,获取配置文件属性
 * @author sixmonth
 * @Date 2019年5月18日
 *
 */
@ConfigurationProperties(prefix = DataSourceProperties.DS, ignoreUnknownFields = false)
public class DataSourceProperties {
	final static String DS = "spring.datasource";
	
	private Map<String,String> hive;
	
	private Map<String,String> commonConfig;
	

	/*为节省空间,这里省略set和get方法,需自行添加上去*/

}
  • 4. 新建数据库连接池通配属性类,可应用到所有的数据源:DataSourceCommonProperties;
package com.springboot.sixmonth.common.config.druid;

import org.springframework.boot.context.properties.ConfigurationProperties;

/**
 * -扩展连接池,通用配置属性,可应用到所有数据源
 * @author sixmonth
 * @Date 2019年5月18日
 *
 */
@ConfigurationProperties(prefix = DataSourceCommonProperties.DS, ignoreUnknownFields = false)
public class DataSourceCommonProperties {
	final static String DS = "spring.datasource.commonConfig";

	private int initialSize = 10;
	private int minIdle;
	private int maxIdle;
	private int maxActive;
	private int maxWait;
	private int timeBetweenEvictionRunsMillis;
	private int minEvictableIdleTimeMillis;
	private String validationQuery;
	private boolean testWhileIdle;
	private boolean testOnBorrow;
	private boolean testOnReturn;
	private boolean poolPreparedStatements;
	private int maxOpenPreparedStatements;
	private String filters;

	private String mapperLocations;
	private String typeAliasPackage;

	/*为节省空间,这里省略set和get方法,需自行添加上去*/
	
}
  • 5. 新建hive数据源配置类,加载配置属性文件,注入bean容器:HiveDruidConfig;
package com.springboot.sixmonth.common.config.druid.hive;

import java.sql.SQLException;

import javax.sql.DataSource;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import com.alibaba.druid.pool.DruidDataSource;
import com.springboot.sixmonth.common.config.druid.DataSourceCommonProperties;
import com.springboot.sixmonth.common.config.druid.DataSourceProperties;

/**
 * -配置hive数据源
 * @author sixmonth
 * @Date 2019年5月18日
 *
 */
@Configuration
@EnableConfigurationProperties({DataSourceProperties.class,DataSourceCommonProperties.class})//将配置类注入到bean容器,使ConfigurationProperties注解类生效
public class HiveDruidConfig {

	    private static Logger logger = LoggerFactory.getLogger(HiveDruidConfig.class);

	    @Autowired
	    private DataSourceProperties dataSourceProperties;
	    
	    @Autowired
	    private DataSourceCommonProperties dataSourceCommonProperties;

	    @Bean("hiveDruidDataSource") //新建bean实例
	    @Qualifier("hiveDruidDataSource")//标识
	    public DataSource dataSource(){
	        DruidDataSource datasource = new DruidDataSource();

	        //配置数据源属性
	        datasource.setUrl(dataSourceProperties.getHive().get("url"));
	        datasource.setUsername(dataSourceProperties.getHive().get("username"));
	        datasource.setPassword(dataSourceProperties.getHive().get("password"));
	        datasource.setDriverClassName(dataSourceProperties.getHive().get("driver-class-name"));
	        
	        //配置统一属性
	        datasource.setInitialSize(dataSourceCommonProperties.getInitialSize());
	        datasource.setMinIdle(dataSourceCommonProperties.getMinIdle());
	        datasource.setMaxActive(dataSourceCommonProperties.getMaxActive());
	        datasource.setMaxWait(dataSourceCommonProperties.getMaxWait());
	        datasource.setTimeBetweenEvictionRunsMillis(dataSourceCommonProperties.getTimeBetweenEvictionRunsMillis());
	        datasource.setMinEvictableIdleTimeMillis(dataSourceCommonProperties.getMinEvictableIdleTimeMillis());
	        datasource.setValidationQuery(dataSourceCommonProperties.getValidationQuery());
	        datasource.setTestWhileIdle(dataSourceCommonProperties.isTestWhileIdle());
	        datasource.setTestOnBorrow(dataSourceCommonProperties.isTestOnBorrow());
	        datasource.setTestOnReturn(dataSourceCommonProperties.isTestOnReturn());
	        datasource.setPoolPreparedStatements(dataSourceCommonProperties.isPoolPreparedStatements());
	        try {
	            datasource.setFilters(dataSourceCommonProperties.getFilters());
	        } catch (SQLException e) {
	            logger.error("Druid configuration initialization filter error.", e);
	        }
	        return datasource;
	    }
	    
}
  • 6. 新建JdbcTemplate类,自动注入hive数据源:HiveJdbcBaseDaoImpl
package com.springboot.sixmonth.common.config.jdbcConfig;

import javax.sql.DataSource;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Repository;

/**
 * -注入hive数据源
 * @author sixmonth
 * @Date 2019年5月18日
 *
 */
@Repository
public class HiveJdbcBaseDaoImpl {
	
	private JdbcTemplate jdbcTemplate;

	public JdbcTemplate getJdbcTemplate() {
		return jdbcTemplate;
	}

	@Autowired
	public void setJdbcTemplate(@Qualifier("hiveDruidDataSource") DataSource dataSource) {
		this.jdbcTemplate = new JdbcTemplate(dataSource);
	}
	
}
  • 7. 新建测试dao类,集成hiveJdbctemplate,调用hql,查询hive数据库数据:TestHiveDao
package com.springboot.sixmonth.dao.jdbcDao.test;

import org.springframework.stereotype.Repository;
import com.springboot.sixmonth.common.config.jdbcConfig.HiveJdbcBaseDaoImpl;

/**
 * -测试hive连接
 * @author sixmonth
 * @Date 2019年5月18日
 *
 */
@Repository
public class TestHiveDao extends HiveJdbcBaseDaoImpl{

	/**
	 * 测试获取hive数据库数据信息
	 * @return
	 */
	public String test() {
		String sql = "SELECT name from sixmonth limit 1";
		String param = this.getJdbcTemplate().queryForObject(sql,String.class);
		return param;
	}
	
}
  • 8. 使用方法:自动注入TestHiveDao,即可直接调用方法使用;
@Autowired
private TestHiveDao testHiveDao;

注意事项

  1. hive连接的用户名和密码可在hive安装的时候自定义,具体可参考博主上一篇博客:Hadoop大数据生态:Hive自定义设置连接用户名和密码
  2. jdbctemplate操作hive数据源的时候,基本支持所用到的hql语言,可自行测试;
  3. Hive与关系型数据库不一样,关系型数据库都是为了实时查询的业务进行设计的,而Hive则是为海量数据做数据挖掘设计的,实时性很差,一般不做实时性查询,如果需要结合实时性查询,可了解整合spark;

总结

  1. SpringBoot可整合多种数据源,本篇博客仅针对hive数据源的配置,多数据源整合的话,可参考博主的另一篇博客:SpringBoot多数据源配置(hive/mysql)
  2. 实践是检验认识真理性的唯一标准,自己动手,丰衣足食~~
发布了79 篇原创文章 · 获赞 276 · 访问量 57万+

猜你喜欢

转载自blog.csdn.net/alan_liuyue/article/details/90314676