spark-submit \
--executor-memory 8G \
--conf spark.target.date=${dt} \
--conf spark.ui.retainedTasks=50000 \
--driver-memory 4G \
--master yarn \
--class com.cifi.zhyc.SparkImportMcYanPan\
--deploy-mode client \
{JsonSpark.jar} \
package com.cifi.zhyc
import java.io.{BufferedReader, InputStreamReader}
import java.util.Properties
import org.apache.spark.sql.SparkSession
/*
包括内容有简单的spark-sql测试、作业(反射实现和API实现)
*/
object SparkImportMcYanPan {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.getOrCreate()
val conf = spark.sparkContext.getConf
//获取提交代码的时候指定的 spark.target.date 这个参数
var date = conf.get("spark.target.date")
if (date == null || date.length != 8) {
//将数据格式化成yyyyMMdd
date = GeneralUtils.getDateStr("yyyyMMdd")
}
//read properties
val properties = new Properties()
val path = Thread.currentThread().getContextClassLoader.getResourceAsStream("ref_file_table.properties") //文件要放到resource文件夹下
//中文读取乱码解决办法
val bf = new BufferedReader(new InputStreamReader(path))
properties.load(bf)
// val db_config = new Properties()
// db_config.setProperty("user", properties.getProperty("pg.user"))
// db_config.setProperty("password", properties.getProperty("pg.passwd"))
/*3. 城市研判_节⽓图*/
import spark.implicits._
import org.apache.spark.sql.functions._
val data = spark.read.format("jdbc")
.option("url", "jdbc:postgresql://" + properties.getProperty("pg.host") + ":" + properties.getProperty("pg.port") + "/" + properties.getProperty("pg.db"))
.option("dbtable", "collector.\"data-75ec1e84-971f-46aa-b9b4-1bb597714b77\"")
.option("driver", "org.postgresql.Driver")
.option("user", properties.getProperty("pg.user"))
.option("password", properties.getProperty("pg.passwd"))
.load()
data.select(
//数据都在extra列 形如
// extra
// {"a": 1, "b":2}
get_json_object($"extra","$.城市").as("city"),
get_json_object($"extra","$.日期").as("data_time"),
get_json_object($"extra","$.年同比价格涨幅").as("year_tb_jgzf"),
get_json_object($"extra","$.月环比价格涨幅").as("month_hb_jgzf"),
get_json_object($"extra","$.年同比价格涨幅_画图").as("year_tb_jgzf_map"),
get_json_object($"extra","$.节气").as("jieqi"),
get_json_object($"extra","$.是否预测").as("is_predict"),
get_json_object($"extra","$.求势指数_环比").as("qiushi_index_hb"),
get_json_object($"extra","$.冰山指数_环比").as("ice_index_hb"),
get_json_object($"extra","$.禧泰指数_环比").as("xitai_index_hb"),
get_json_object($"extra","$.统计局指数_环比").as("tjj_index_hb"),
get_json_object($"extra","$.总结").as("conclusion")
).createOrReplaceGlobalTempView("jieqi")
// if(tableName.equals("zyc_mc_people_track")) {
// }else{
// data.repartition(3).createOrReplaceGlobalTempView(tableName)
// }
spark.sql("insert overwrite table ods.zyc_mc_yanpan_jieqi partition(dt='" + date + "') select * from global_temp.jieqi").show()
spark.stop()
}
}
pom中增加这个依赖就可以在plugin-assembly中打包了,会多一个jar-with-dependencies的包
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>