SparkSql achieve a small case - code section (a)

I. Case Study
Case contains three tables: tbDate, tbStock, tbStockDetail. Field Information table below:
Here Insert Picture Description
Second, the requirement
1, calculate the number of all single sales order per year, total sales of
2, calculate the maximum amount each year on all orders sales orders
3, in order to calculate all the year's best-selling goods

Third, the code

case class tbStock(ordernumber:String,locationid:String,dateid:String) extends Serializable
case class tbStockDetail(ordernumber:String, rownum:Int, itemid:String, number:Int, price:Double, amount:Double) extends Serializable
case class tbDate(dateid:String, years:Int, theyear:Int, month:Int, day:Int, weekday:Int, week:Int, quarter:Int, period:Int, halfmonth:Int) extends Serializable
object StockDemo {
  def main(args: Array[String]): Unit = {
    //创建sparkSession对象
    val sparkConf = new SparkConf().setAppName("StockDemo").setMaster("local[*]")
    val spark = SparkSession.builder().config(sparkConf).getOrCreate()
    import spark.implicits._
//读取文件,生成RDD
    val tbStockRdd = spark.sparkContext.textFile("input\\stock\\tbStock.txt")
    val tbStockDetailRdd = spark.sparkContext.textFile("input\\stock\\tbStockDetail.txt")
    val tbDateRdd = spark.sparkContext.textFile("input\\stock\\tbDate.txt")
//RDD处理数据,然后toDS ,转成Dataset
    val tbStockDS = tbStockRdd.map(_.split(",")).map(attr=>tbStock(attr(0),attr(1),attr(2) )).toDS
    val tbStockDetailDS = tbStockDetailRdd.map(_.split(",")).map(attr=> tbStockDetail(attr(0),attr(1).trim().toInt,attr(2),attr(3).trim().toInt,
      attr(4).trim().toDouble, attr(5).trim().toDouble)).toDS
    val tbDateDS = tbDateRdd.map(_.split(",")).map(attr=> tbDate(attr(0),attr(1).trim().toInt, attr(2).trim().toInt,attr(3).trim().toInt, attr(4).trim().toInt, 
      attr(5).trim().toInt, attr(6).trim().toInt, attr(7).trim().toInt, attr(8).trim().toInt, attr(9).trim().toInt)).toDS
//展示
    //tbStockDS.show()
    //tbStockDetailDS.show()
    //tbDateDS.show()
//注册表
    tbStockDS.createOrReplaceTempView("tbStock")
    tbDateDS.createOrReplaceTempView("tbDate")
    tbStockDetailDS.createOrReplaceTempView("tbStockDetail")
    /*1、计算所有订单中每年的销售单数、销售总额
    * 三个表连接后以 count(distinct a.ordernumber)计销售单数,sum(b.amount)计销售总额*/
    spark.sql("SELECT c.theyear,COUNT(a.ordernumber),SUM(b.amount) " +
      "FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
      "JOIN tbDate c ON a.dateid = c.dateid " +
      "GROUP BY c.theyear").show

    /*2、计算所有订单每年最大金额订单的销售额 目标:统计每年最大金额订单的销售额:*/
    spark.sql("SELECT theyear, MAX(c.SumOfAmount) AS SumOfAmount FROM " +
      "(SELECT a.dateid, a.ordernumber, SUM(b.amount) AS SumOfAmount FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber GROUP BY a.dateid, a.ordernumber ) c " +
      "JOIN tbDate d ON c.dateid = d.dateid GROUP BY theyear ORDER BY theyear DESC").show

    /*3、计算所有订单中每年最畅销货品
    目标:统计每年最畅销货品(哪个货品销售额 amount 在当年最高,哪个就是最畅销货品)*/
/*    spark.sql("SELECT DISTINCT e.theyear, e.itemid, f.maxofamount FROM " +
      "(SELECT c.theyear, b.itemid, SUM(b.amount) AS sumofamount FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
      "JOIN tbDate c ON a.dateid = c.dateid GROUP BY c.theyear, b.itemid ) e " +
      "JOIN (SELECT d.theyear, MAX(d.sumofamount) AS maxofamount FROM " +
      "(SELECT c.theyear, b.itemid, SUM(b.amount) AS sumofamount FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
      "JOIN tbDate c ON a.dateid = c.dateid GROUP BY c.theyear, b.itemid ) d " +
      "GROUP BY d.theyear ) f ON e.theyear = f.theyear AND e.sumofamount = f.maxofamount " +
      "ORDER BY e.theyear").show*/

    /* d.theyear,Count(itemid),MAX(d.SumOfAmount) AS MaxOfAmount*/
    spark.sql("SELECT d.theyear,MAX(itemid),MAX(d.SumOfAmount) AS MaxOfAmount FROM " +
      "(SELECT c.theyear, b.itemid, SUM(b.amount) AS SumOfAmount FROM tbStock a " +
      "JOIN tbStockDetail b ON a.ordernumber = b.ordernumber " +
      "JOIN tbDate c ON a.dateid = c.dateid GROUP BY c.theyear, b.itemid ) d GROUP BY d.theyear").show
    spark.stop()
  }
}
Published 22 original articles · won praise 22 · views 783

Guess you like

Origin blog.csdn.net/weixin_45568892/article/details/104396828