SparkSql alcançar um caso pequeno - seção de código (a)

I. Estudo de Caso
Caso contém três tabelas: tbDate, tbStock, tbStockDetail. tabela Informações campo abaixo:
Aqui Insert Picture Descrição
Em segundo lugar, a exigência
1, calcular o número de todas as vendas único pedido por ano, as vendas totais de
2, calcular a quantidade máxima de cada ano em todas as ordens ordens de vendas
3, a fim de calcular todos os best-venda de bens do ano

Em terceiro lugar, o código

case class tbStock(ordernumber:String,locationid:String,dateid:String) extends Serializable
case class tbStockDetail(ordernumber:String, rownum:Int, itemid:String, number:Int, price:Double, amount:Double) extends Serializable
case class tbDate(dateid:String, years:Int, theyear:Int, month:Int, day:Int, weekday:Int, week:Int, quarter:Int, period:Int, halfmonth:Int) extends Serializable
object StockDemo {
  def main(args: Array[String]): Unit = {
    //创建sparkSession对象
    val sparkConf = new SparkConf().setAppName("StockDemo").setMaster("local[*]")
    val spark = SparkSession.builder().config(sparkConf).getOrCreate()
    import spark.implicits._
//读取文件,生成RDD
    val tbStockRdd = spark.sparkContext.textFile("input\\stock\\tbStock.txt")
    val tbStockDetailRdd = spark.sparkContext.textFile("input\\stock\\tbStockDetail.txt")
    val tbDateRdd = spark.sparkContext.textFile("input\\stock\\tbDate.txt")
//RDD处理数据,然后toDS ,转成Dataset
    val tbStockDS = tbStockRdd.map(_.split(",")).map(attr=>tbStock(attr(0),attr(1),attr(2) )).toDS
    val tbStockDetailDS = tbStockDetailRdd.map(_.split(",")).map(attr=> tbStockDetail(attr(0),attr(1).trim().toInt,attr(2),attr(3).trim().toInt,
      attr(4).trim().toDouble, attr(5).trim().toDouble)).toDS
    val tbDateDS = tbDateRdd.map(_.split(",")).map(attr=> tbDate(attr(0),attr(1).trim().toInt, attr(2).trim().toInt,attr(3).trim().toInt, attr(4).trim().toInt, 
      attr(5).trim().toInt, attr(6).trim().toInt, attr(7).trim().toInt, attr(8).trim().toInt, attr(9).trim().toInt)).toDS
//展示
    //tbStockDS.show()
    //tbStockDetailDS.show()
    //tbDateDS.show()
//注册表
    tbStockDS.createOrReplaceTempView("tbStock")
    tbDateDS.createOrReplaceTempView("tbDate")
    tbStockDetailDS.createOrReplaceTempView("tbStockDetail")
    /*1、计算所有订单中每年的销售单数、销售总额
    * 三个表连接后以 count(distinct a.ordernumber)计销售单数,sum(b.amount)计销售总额*/
    spark.sql("SELECT c.theyear,COUNT(a.ordernumber),SUM(b.amount) " +
      "FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
      "JOIN tbDate c ON a.dateid = c.dateid " +
      "GROUP BY c.theyear").show

    /*2、计算所有订单每年最大金额订单的销售额 目标:统计每年最大金额订单的销售额:*/
    spark.sql("SELECT theyear, MAX(c.SumOfAmount) AS SumOfAmount FROM " +
      "(SELECT a.dateid, a.ordernumber, SUM(b.amount) AS SumOfAmount FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber GROUP BY a.dateid, a.ordernumber ) c " +
      "JOIN tbDate d ON c.dateid = d.dateid GROUP BY theyear ORDER BY theyear DESC").show

    /*3、计算所有订单中每年最畅销货品
    目标:统计每年最畅销货品(哪个货品销售额 amount 在当年最高,哪个就是最畅销货品)*/
/*    spark.sql("SELECT DISTINCT e.theyear, e.itemid, f.maxofamount FROM " +
      "(SELECT c.theyear, b.itemid, SUM(b.amount) AS sumofamount FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
      "JOIN tbDate c ON a.dateid = c.dateid GROUP BY c.theyear, b.itemid ) e " +
      "JOIN (SELECT d.theyear, MAX(d.sumofamount) AS maxofamount FROM " +
      "(SELECT c.theyear, b.itemid, SUM(b.amount) AS sumofamount FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
      "JOIN tbDate c ON a.dateid = c.dateid GROUP BY c.theyear, b.itemid ) d " +
      "GROUP BY d.theyear ) f ON e.theyear = f.theyear AND e.sumofamount = f.maxofamount " +
      "ORDER BY e.theyear").show*/

    /* d.theyear,Count(itemid),MAX(d.SumOfAmount) AS MaxOfAmount*/
    spark.sql("SELECT d.theyear,MAX(itemid),MAX(d.SumOfAmount) AS MaxOfAmount FROM " +
      "(SELECT c.theyear, b.itemid, SUM(b.amount) AS SumOfAmount FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
      "JOIN tbDate c ON a.dateid = c.dateid GROUP BY c.theyear, b.itemid ) d GROUP BY d.theyear").show
    spark.stop()
  }
}
Publicado 22 artigos originais · ganhou elogios 22 · vista 783

Acho que você gosta

Origin blog.csdn.net/weixin_45568892/article/details/104396828
Recomendado
Clasificación