thriftserver

export SPARK_CONF_DIR=/home/yunzhi.lyz/spark/spark220/conf
spark-default.conf 

spark.yarn.jars  hdfs:/app/jars/*.jar   # hdfs jar, 每次 submit ,如果 spark/jars 下面 jar 和 HDFS 上一样,用 HDFS上的,优先用 driver 上的jar
spark.app.confpath /udfconf/udf.config   # hdfs 配置

start-thriftserver.sh  一定要用 client 模式 , 而 client模式, 一定要通过  --driver-class-path $CLASSPATH:/home/...SNAPSHOT.jar  来加载 driver上的 classpath

加入 --driver-class-path  的jar  ,最终在 java command 中 会出现在  java -cp  后面。 这样 java driver 才能加载到。

spark.driver.extraClassPath  对 client 是无效的
worker 上的 spark.executor.extraClassPath   配置 , 测试  --jars 也是可用的。

      val configFilePath = sparkConf.getOption("spark.app.confpath")
        .getOrElse("/Users/l/spark/git/spark/sql/hive-thriftserver/conf-file/udf.config")
      println(s"configFilePath SparkSQLEnv : $configFilePath ")


object UdfLoadUtils {
  var configArray :Array[String] = null
  def udfRegister(configFilePath: String, spark: SparkSession): Unit = {
     if(configArray ==null) configArray = spark.sparkContext.textFile(configFilePath, 1).collect()
    //   name,classname,returnType(udf need)
    configArray.foreach(record => {
      val registerInfoArray = record.split(",")
      println(s"register udf info : $record")

      if (registerInfoArray.size == 2) {
        val Array(udfName,className) = registerInfoArray
        val instance = getUDAFInstanceByClass(className)
        spark.sqlContext.udf.register(udfName, instance)
      } else if (registerInfoArray.size == 3) {
        val Array(udfName,className,returnType) = registerInfoArray
        var  returnDataType : DataType =  null
        returnType match{
          // Numeric types
          case "ByteType" => returnDataType = DataTypes.ByteType
          case "ShortType" => returnDataType = DataTypes.ShortType
          case "IntegerType" => returnDataType = DataTypes.IntegerType
          case "LongType" => returnDataType = DataTypes.LongType
          case "FloatType" => returnDataType = DataTypes.FloatType
          case "DoubleType" => returnDataType = DataTypes.DoubleType
          //case "DecimalType" => returnDataType = DecimalType
          // String types
          case "StringType" => returnDataType = DataTypes.StringType
          // Binary type
          case "BinaryType" => returnDataType = DataTypes.BinaryType
          // Boolean type
          case "BooleanType" => returnDataType = DataTypes.BooleanType
          // Datetime type
          case "TimestampType" => returnDataType = DataTypes.TimestampType
          case "DateType" => returnDataType = DataTypes.DateType
          // Complex types
          //case "ArrayType" => returnDataType = ArrayType
          //case "MapType" => returnDataType = MapType
          //case "StructType" => returnDataType = StructType
          case _ => None
        }
        spark.sqlContext.udf.registerJava(udfName,className,returnDataType)
      }



    })



  }


  def getUDAFInstanceByClass(className: String) :UserDefinedAggregateFunction= {
    var  instance : UserDefinedAggregateFunction = null
    try {
      instance =  Class.forName(className).newInstance.asInstanceOf[UserDefinedAggregateFunction]
    }catch{
      case ex:Throwable =>  {
        println(s" instance $className  error ,error info : ${ex.getCause} ...................... ")
        ex.printStackTrace()
      }
    }
    instance
  }



}


thriftserver


org.apache.spark.sql.hive.thriftserver.SparkSQLEnv#init
      val configFilePath = sparkConf.getOption("spark.app.confpath")
        .getOrElse("/Users/l/spark/git/spark/sql/hive-thriftserver/conf-file/udf.config")
      println(s"configFilePath SparkSQLEnv : $configFilePath ")
      UdfLoadUtils.udfRegister(configFilePath,sparkSession)

org.apache.spark.sql.hive.thriftserver.SparkSQLSessionManager#openSession
  println(s" open session : single ${sqlContext.conf.hiveThriftServerSingleSession}  ")
    val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) {
      sqlContext
    } else {
      sqlContext.newSession()
    }
    if(!sqlContext.conf.hiveThriftServerSingleSession){
    val configFilePath = ctx.sparkContext.conf.getOption("spark.app.confpath")
     .getOrElse("/Users/l/spark/git/spark/sql/hive-thriftserver/conf-file/udf.config")
    println(s"openSession  configFilePath: $configFilePath ")
    UdfLoadUtils.udfRegister(configFilePath,ctx.sparkSession)
    }

    ctx.setConf("spark.sql.hive.version", HiveUtils.hiveExecutionVersion)

猜你喜欢

转载自lingzhi007.iteye.com/blog/2393309