springboot项目连接hive服务器并进行crud操作

本篇文章是讲在springboot项目中怎么去连接hive数据仓库，并且进行简单的增删改查操作。

一、连接hive

1.加入相关依赖如下：

    <dependency>
        <groupId>org.apache.hive</groupId>
        <artifactId>hive-jdbc</artifactId>
        <version>1.1.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.6.0</version>
    </dependency>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-jdbc</artifactId>
    </dependency>
    <dependency>
        <groupId>jdk.tools</groupId>
        <artifactId>jdk.tools</artifactId>
        <version>1.8</version>
        <scope>system</scope>
        <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
	</dependency>

如果上述依赖在加入过程中出现错误，可能是与springboot的版本冲突导致，改为相应版本即可。其中jdk.tools改为你对应的jdk版本。从这里可以看到加入了hive-jdbc依赖，该依赖是用于对hive仓库进行crud操作的。

2.配置yml文件

	hive:
	  url: jdbc:hive2://ip地址:端口/;auth=noSasl
	  driver-class-name: org.apache.hive.jdbc.HiveDriver
	  type: com.alibaba.druid.pool.DruidDataSource
	  user: 用户名
	  password: 密码
	           # 下面为连接池的补充设置，应用到上面所有数据源中
	           # 初始化大小，最小，最大
	  initialSize: 1
	  minIdle: 3
	  maxActive: 20
	           # 配置获取连接等待超时的时间
	  maxWait: 60000
	           # 配置间隔多久才进行一次检测，检测需要关闭的空闲连接，单位是毫秒
	  timeBetweenEvictionRunsMillis: 60000
	           # 配置一个连接在池中最小生存的时间，单位是毫秒
	  minEvictableIdleTimeMillis: 30000
	  validationQuery: select 1
	  testWhileIdle: true
	  testOnBorrow: false
	  testOnReturn: false
	           # 打开PSCache，并且指定每个连接上PSCache的大小
	  poolPreparedStatements: true
	  maxPoolPreparedStatementPerConnectionSize: 20
	  connectionErrorRetryAttempts: 0
	  breakAfterAcquireFailure: true

注意将上述中ip地址、端口、用户名、密码等字样替换为你hive仓库相应的数据。

三、编写hive配置类

import com.alibaba.druid.pool.DruidDataSource;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.jdbc.core.JdbcTemplate;
import javax.sql.DataSource;

/**
 * hive的相关配置类
 * @author zyl
 * @date 2019年4月1日
 */

@Configuration
@ConfigurationProperties(prefix = "hive")
public class HiveDruidConfig {

private String url;
private String user;
private String password;
private String driverClassName;
private int initialSize;
private int minIdle;
private int maxActive;
private int maxWait;
private int timeBetweenEvictionRunsMillis;
private int minEvictableIdleTimeMillis;
private String validationQuery;
private boolean testWhileIdle;
private boolean testOnBorrow;
private boolean testOnReturn;
private boolean poolPreparedStatements;
private int maxPoolPreparedStatementPerConnectionSize;
private int connectionErrorRetryAttempts;
private boolean breakAfterAcquireFailure;

@Bean(name = "hiveDruidDataSource")
@Qualifier("hiveDruidDataSource")
public DataSource dataSource() {
    DruidDataSource datasource = new DruidDataSource();
    datasource.setUrl(url);
    datasource.setUsername(user);
    datasource.setPassword(password);
    datasource.setDriverClassName(driverClassName);
    datasource.setInitialSize(initialSize);
    datasource.setMinIdle(minIdle);
    datasource.setMaxActive(maxActive);
    datasource.setMaxWait(maxWait);
    datasource.setTimeBetweenEvictionRunsMillis(timeBetweenEvictionRunsMillis);
    datasource.setMinEvictableIdleTimeMillis(minEvictableIdleTimeMillis);
    datasource.setValidationQuery(validationQuery);
    datasource.setTestWhileIdle(testWhileIdle);
    datasource.setTestOnBorrow(testOnBorrow);
    datasource.setTestOnReturn(testOnReturn);
    datasource.setPoolPreparedStatements(poolPreparedStatements);
    datasource.setMaxPoolPreparedStatementPerConnectionSize(maxPoolPreparedStatementPerConnectionSize);
    datasource.setConnectionErrorRetryAttempts(connectionErrorRetryAttempts);
    datasource.setBreakAfterAcquireFailure(breakAfterAcquireFailure);
    return datasource;
}

@Bean(name = "hiveDruidTemplate")
public JdbcTemplate hiveDruidTemplate(@Qualifier("hiveDruidDataSource") DataSource dataSource) {
    return new JdbcTemplate(dataSource);
}

public int getConnectionErrorRetryAttempts() {
    return connectionErrorRetryAttempts;
}

public void setConnectionErrorRetryAttempts(int connectionErrorRetryAttempts) {
    this.connectionErrorRetryAttempts = connectionErrorRetryAttempts;
}

public boolean isBreakAfterAcquireFailure() {
    return breakAfterAcquireFailure;
}

public void setBreakAfterAcquireFailure(boolean breakAfterAcquireFailure) {
    this.breakAfterAcquireFailure = breakAfterAcquireFailure;
}

public String getUrl() {
    return url;
}

public void setUrl(String url) {
    this.url = url;
}

public String getUser() {
    return user;
}

public void setUser(String user) {
    this.user = user;
}

public String getPassword() {
    return password;
}

public void setPassword(String password) {
    this.password = password;
}

public String getDriverClassName() {
    return driverClassName;
}

public void setDriverClassName(String driverClassName) {
    this.driverClassName = driverClassName;
}

public int getInitialSize() {
    return initialSize;
}

public void setInitialSize(int initialSize) {
    this.initialSize = initialSize;
}

public int getMinIdle() {
    return minIdle;
}

public void setMinIdle(int minIdle) {
    this.minIdle = minIdle;
}

public int getMaxActive() {
    return maxActive;
}

public void setMaxActive(int maxActive) {
    this.maxActive = maxActive;
}

public int getMaxWait() {
    return maxWait;
}

public void setMaxWait(int maxWait) {
    this.maxWait = maxWait;
}

public int getTimeBetweenEvictionRunsMillis() {
    return timeBetweenEvictionRunsMillis;
}

public void setTimeBetweenEvictionRunsMillis(int timeBetweenEvictionRunsMillis) {
    this.timeBetweenEvictionRunsMillis = timeBetweenEvictionRunsMillis;
}

public int getMinEvictableIdleTimeMillis() {
    return minEvictableIdleTimeMillis;
}

public void setMinEvictableIdleTimeMillis(int minEvictableIdleTimeMillis) {
    this.minEvictableIdleTimeMillis = minEvictableIdleTimeMillis;
}

public String getValidationQuery() {
    return validationQuery;
}

public void setValidationQuery(String validationQuery) {
    this.validationQuery = validationQuery;
}

public boolean isTestWhileIdle() {
    return testWhileIdle;
}

public void setTestWhileIdle(boolean testWhileIdle) {
    this.testWhileIdle = testWhileIdle;
}

public boolean isTestOnBorrow() {
    return testOnBorrow;
}

public void setTestOnBorrow(boolean testOnBorrow) {
    this.testOnBorrow = testOnBorrow;
}

public boolean isTestOnReturn() {
    return testOnReturn;
}

public void setTestOnReturn(boolean testOnReturn) {
    this.testOnReturn = testOnReturn;
}

public boolean isPoolPreparedStatements() {
    return poolPreparedStatements;
}

public void setPoolPreparedStatements(boolean poolPreparedStatements) {
    this.poolPreparedStatements = poolPreparedStatements;
}

public int getMaxPoolPreparedStatementPerConnectionSize() {
    return maxPoolPreparedStatementPerConnectionSize;
}

public void setMaxPoolPreparedStatementPerConnectionSize(int maxPoolPreparedStatementPerConnectionSize) {
    this.maxPoolPreparedStatementPerConnectionSize = maxPoolPreparedStatementPerConnectionSize;
}

}

该配置类实际是配置DruidDataSource和JdbcTemplate。注意@ConfigurationProperties(prefix = “hive”)这句是将你yml文件中hive节点下的值注入到同名属性中去。

二、进行crud操作

使用jdbc操作hive跟使用jdbc操作mysql等关系型数据库基本一样，下面给出一个样例说明。

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import javax.sql.DataSource;
import java.sql.*;

public class Test {
    @Autowired
    @Qualifier("hiveDruidDataSource")
    private DataSource druidDataSource;

    public static String getData(DataSource druidDataSource,String sqlString,String resultName) {
        Connection conn = null;
        Statement statement = null;
        ResultSet rs = null;
        JSONObject result = null;
        try {
            conn = druidDataSource.getConnection();
            statement = conn.createStatement();
            result = new JSONObject();
            result.put("state", "0");
            JSONArray array = new JSONArray();
            rs = statement.executeQuery(sqlString.toString());
            ResultSetMetaData metaData = rs.getMetaData();
            int columnCount = metaData.getColumnCount();
            while (rs.next()) {
                JSONObject jsonObj = new JSONObject();
                // 遍历每一列
                for (int i = 1; i <= columnCount; i++) {
                    String columnName = metaData.getColumnLabel(i);
                    String value = rs.getString(columnName);
                    jsonObj.put(columnName, value);
                }
                array.add(jsonObj);
            }
            result.put("analysisMACResult", array.toString());
        } catch (SQLException e) {
            e.printStackTrace();
            return "{\"state\":\"1\"}";
        } finally {
            try {
                conn.close();
                statement.close();
                rs.close();
            } catch (SQLException e) {
                e.printStackTrace();
                return "{\"state\":\"1\"}";
            }
        }
        return result.toString();
    }
}

仔细看一下这个操作类，其实跟jdbc操作mysql没区别。但要注意@Qualifier(“hiveDruidDataSource”)中的hiveDruidDataSource这个对象名，其实是你在配置类中定义的。上述给出的是查询操作，其他操作也类似，如果你使用过jdbc操作过关系型数据库，我想我不用多说了。介绍就到这了，如有误解欢迎指正。

z不像程序员的程序猿

发布了25 篇原创文章 · 获赞 58 · 访问量 4万+

私信关注