impala jdbc驱动执行impala sql的一个坑（不支持多行sql）

架构使用spark streaming 消费kafka的数据，并通过impala来插入到kudu中，但是通过对比发现落地到kudu表中的数据比kafka消息数要少，通过后台日志发现，偶发性的出现java.sql.SQLException: [Simba][ImpalaJDBCDriver](500051) ERROR processing query/statement. Error Code: 0, SQL state: TStatus(statusCode:ERROR_STATUS, sqlState:HY000, errorMessage:AnalysisException: Syntax error in line 1

原因是调用过程中使用了数据库连接池,会合并多行sql执行，实现如下：

import com.alibaba.druid.pool.DruidDataSource;
import com.alibaba.druid.pool.DruidDataSourceFactory;
import com.alibaba.druid.pool.DruidPooledConnection;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import java.sql.SQLException;
import java.util.Properties;


public class ImapalConnPool {

    private static Log logger = LogFactory.getLog(ImapalConnPool.class);

    private static ImapalConnPool imapalConnPool = null;
    private static DruidDataSource druidDataSource = null;

    static {
        Properties properties = new Properties();
        properties.setProperty("driverClassName","com.cloudera.impala.jdbc41.Driver");
        properties.setProperty("url","jdbc:impala://127.0.0.1:21050");  
        properties.setProperty("username","");
        properties.setProperty("password","");
        properties.setProperty("initialSize","50");
        properties.setProperty("maxActive","100");
        properties.setProperty("maxWait","60000");
        properties.setProperty("timeBetweenEvictionRunsMillis","60000");
        properties.setProperty("minEvictableIdleTimeMillis","300000");
        properties.setProperty("validationQuery","SELECT 1");
        properties.setProperty("testWhileIdle","true");
        properties.setProperty("testOnBorrow","false");
        properties.setProperty("testOnReturn","false");
        properties.setProperty("poolPreparedStatements","false");
        //当该值大于0时，启用pool，poolPreparedStatements为true
        properties.setProperty("maxPoolPreparedStatementPerConnectionSize","-1");

        try {
            druidDataSource = (DruidDataSource) DruidDataSourceFactory.createDataSource(properties); //DruidDataSrouce工厂模式
        } catch (Exception e) {
            logger.error(e);
        }
    }

    public static ImapalConnPool getInstance(){
        if (null == imapalConnPool){
            synchronized(ImapalConnPool.class) {
                if(null == imapalConnPool) {
                    imapalConnPool = new ImapalConnPool();
                }
            }
        }
        return imapalConnPool;
    }

    public DruidPooledConnection getConnection() throws SQLException {
        return druidDataSource.getConnection();
    }

}

调整上述参数poolPreparedStatements和maxPoolPreparedStatementPerConnectionSize任然不能解决同一个connection合并多行sql的问题

后去掉连接池，改为jdbc直连问题解决。有解决过上面数据库连接池问题的麻烦告知我一下

Connection connectionn = null;
Statement statement = null;
try {
    Class.forName("com.cloudera.impala.jdbc41.Driver");
    connectionn = DriverManager.getConnection("jdbc:impala://127.0.0.1:21050");
    statement = connectionn.createStatement();
    for (String item : execSql) {
        statement.execute(item);
    }
    processResult = true;
}catch (Exception ex){
    logger.error(this,ex);
}finally {
    if(null != statement){
        try {
            statement.close();
        }catch (Exception ex){
            logger.error(this,ex);
        }
    }
    if(null != connectionn) {
        try {
            connectionn.close();
        }catch (Exception ex){
            logger.error(this,ex);
        }
    }
}

impala jdbc驱动执行impala sql的一个坑（不支持多行sql）

猜你喜欢