thriftserver

export SPARK_CONF_DIR=/home/yunzhi.lyz/spark/spark220/conf
spark-default.conf

spark.yarn.jars hdfs:/app/jars/*.jar   # hdfs jar，每次 submit ，如果 spark/jars 下面 jar 和 HDFS 上一样，用 HDFS上的，优先用 driver 上的jar
spark.app.confpath /udfconf/udf.config   # hdfs 配置

start-thriftserver.sh 一定要用 client 模式，而 client模式，一定要通过 --driver-class-path $CLASSPATH:/home/...SNAPSHOT.jar 来加载 driver上的 classpath

加入 --driver-class-path 的jar ，最终在 java command 中会出现在 java -cp 后面。这样 java driver 才能加载到。

spark.driver.extraClassPath 对 client 是无效的
worker 上的 spark.executor.extraClassPath   配置，测试 --jars 也是可用的。

      val configFilePath = sparkConf.getOption("spark.app.confpath")
        .getOrElse("/Users/l/spark/git/spark/sql/hive-thriftserver/conf-file/udf.config")
      println(s"configFilePath SparkSQLEnv : $configFilePath ")

object UdfLoadUtils {
var configArray :Array[String] = null
def udfRegister(configFilePath: String, spark: SparkSession): Unit = {
     if(configArray ==null) configArray = spark.sparkContext.textFile(configFilePath, 1).collect()
    //   name,classname,returnType(udf need)
    configArray.foreach(record => {
      val registerInfoArray = record.split(",")
      println(s"register udf info : $record")

      if (registerInfoArray.size == 2) {
        val Array(udfName,className) = registerInfoArray
        val instance = getUDAFInstanceByClass(className)
        spark.sqlContext.udf.register(udfName, instance)
      } else if (registerInfoArray.size == 3) {
        val Array(udfName,className,returnType) = registerInfoArray
        var returnDataType : DataType = null
        returnType match{
          // Numeric types
          case "ByteType" => returnDataType = DataTypes.ByteType
          case "ShortType" => returnDataType = DataTypes.ShortType
          case "IntegerType" => returnDataType = DataTypes.IntegerType
          case "LongType" => returnDataType = DataTypes.LongType
          case "FloatType" => returnDataType = DataTypes.FloatType
          case "DoubleType" => returnDataType = DataTypes.DoubleType
          //case "DecimalType" => returnDataType = DecimalType
          // String types
          case "StringType" => returnDataType = DataTypes.StringType
          // Binary type
          case "BinaryType" => returnDataType = DataTypes.BinaryType
          // Boolean type
          case "BooleanType" => returnDataType = DataTypes.BooleanType
          // Datetime type
          case "TimestampType" => returnDataType = DataTypes.TimestampType
          case "DateType" => returnDataType = DataTypes.DateType
          // Complex types
          //case "ArrayType" => returnDataType = ArrayType
          //case "MapType" => returnDataType = MapType
          //case "StructType" => returnDataType = StructType
          case _ => None
        }
        spark.sqlContext.udf.registerJava(udfName,className,returnDataType)
      }

    })

}

def getUDAFInstanceByClass(className: String) :UserDefinedAggregateFunction= {
    var instance : UserDefinedAggregateFunction = null
    try {
      instance = Class.forName(className).newInstance.asInstanceOf[UserDefinedAggregateFunction]
    }catch{
      case ex:Throwable => {
        println(s" instance $className error ,error info : ${ex.getCause} ...................... ")
        ex.printStackTrace()
      }
    }
    instance
}

}

thriftserver

org.apache.spark.sql.hive.thriftserver.SparkSQLEnv#init
      val configFilePath = sparkConf.getOption("spark.app.confpath")
        .getOrElse("/Users/l/spark/git/spark/sql/hive-thriftserver/conf-file/udf.config")
      println(s"configFilePath SparkSQLEnv : $configFilePath ")
      UdfLoadUtils.udfRegister(configFilePath,sparkSession)

org.apache.spark.sql.hive.thriftserver.SparkSQLSessionManager#openSession
println(s" open session : single ${sqlContext.conf.hiveThriftServerSingleSession} ")
    val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) {
      sqlContext
    } else {
      sqlContext.newSession()
    }
    if(!sqlContext.conf.hiveThriftServerSingleSession){
    val configFilePath = ctx.sparkContext.conf.getOption("spark.app.confpath")
     .getOrElse("/Users/l/spark/git/spark/sql/hive-thriftserver/conf-file/udf.config")
    println(s"openSession configFilePath: $configFilePath ")
    UdfLoadUtils.udfRegister(configFilePath,ctx.sparkSession)
    }

    ctx.setConf("spark.sql.hive.version", HiveUtils.hiveExecutionVersion)

猜你喜欢